Vertica does not want to use merge join during update operation
I am trying to update table using inner join with another one table. Both table have the same segmentation and same sorted column varchar(100). Who can tell me why Vertica uses hash join operation instead of merge join to get matching for update operation?
0
Comments
You may have other projections. Can you post the example? I did a simple test and seems to work as expected :
=> create table a(c1 varchar(100), c2 varchar(100)) order by c1 ;
=>create table b(c1 varchar(100), c2 varchar(100)) order by c1 ;
=> insert into a values ('aaa','column2');
=> insert into b values ('aaa','column2_b');
=>select analyze_statistics('a');
=>select analyze_statistics('b');
=> explain select a.*, b.* from a inner join b on a.c1 = b.c1;
Access Path:
+-JOIN MERGEJOIN(inputs presorted) [Cost: 25, Rows: 1] (PATH ID: 1) Inner (BROADCAST)
| Join Cond: (a.c1 = b.c1)
| Materialize at Output: a.c2
| Execute on: All Nodes
| +-- Outer -> STORAGE ACCESS for a [Cost: 6, Rows: 1] (PATH ID: 2)
| | Projection: public.a_b0
| | Materialize: a.c1
| | Execute on: All Nodes
| | Runtime Filter: (SIP1(MergeJoin): a.c1)
| +-- Inner -> STORAGE ACCESS for b [Cost: 12, Rows: 1] (PATH ID: 3)
| | Projection: public.b_b0
| | Materialize: b.c1, b.c2
| | Execute on: All Nodes
=> explain update a a set c2 = b.c2 from b b where a.c1 = b.c1;
Access Path:
+-DML UPDATE [Cost: 0, Rows: 0]
| Target Projection: public.a_b1 (NO DELETE)
| Target Projection: public.a_b0 (NO DELETE)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN MERGEJOIN(inputs presorted) [Semi] [Cost: 26, Rows: 1] (PATH ID: 1
) Inner (BROADCAST)
| | Join Cond: (a.c1 = VAL(2))
| | Materialize at Output: a.c2, a.epoch
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for <No Alias> [Cost: 6, Rows: 1] (PATH ID: 2)
| | | Projection: public.a_b0
| | | Materialize: a.c1
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP1(MergeJoin): a.c1)
| | +-- Inner -> STORAGE ACCESS for <No Alias> [Cost: 12, Rows: 1] (PATH ID: 3)
| | | Projection: public.b_b0
| | | Materialize: VAL(0), VAL(0)
| | | Execute on: All Nodes
+-Target Projection: public.a_b1 (DELETE ON CONTAINER)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN MERGEJOIN(inputs presorted) [Semi] [Cost: 14, Rows: 1] (PATH ID: 1
) Inner (BROADCAST)
| | Join Cond: (a.c1 = b.c1)
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for a [Cost: 6, Rows: 1] (PATH ID: 2)
| | | Projection: public.a_b1
| | | Materialize: a.c1
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP2(MergeJoin): a.c1)
| | +-- Inner -> STORAGE ACCESS for b [Cost: 6, Rows: 1] (PATH ID: 3)
| | | Projection: public.b_b0
| | | Materialize: b.c1
| | | Execute on: All Nodes
+-Target Projection: public.a_b0 (DELETE ON CONTAINER)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN MERGEJOIN(inputs presorted) [Semi] [Cost: 14, Rows: 1] (PATH ID: 1
) Inner (BROADCAST)
| | Join Cond: (a.c1 = b.c1)
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for a [Cost: 6, Rows: 1] (PATH ID: 2)
| | | Projection: public.a_b0
| | | Materialize: a.c1
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP3(MergeJoin): a.c1)
| | +-- Inner -> STORAGE ACCESS for b [Cost: 6, Rows: 1] (PATH ID: 3)
| | | Projection: public.b_b0
| | | Materialize: b.c1
| | | Execute on: All Nodes
I dont have other projections
--PARTY_CLS
CREATE TABLE PERS_DATA.PARTY_CLS
(
PARTY_ID int NOT NULL,
PARTY_TYPE_CODE varchar(100) NOT NULL,
MT_PROC_ID int NOT NULL,
MT_INS_DTTM timestamp NOT NULL,
MT_CHNG_DTTM timestamp NOT NULL,
MT_SOURCE_SYSTEM_CODE int NOT NULL,
MT_SSYS_CODE varchar(512) NOT NULL,
MT_HASH int,
MT_DEFAULT_SIGN boolean NOT NULL,
SERVICE_START_DATE date,
CDI_VALID_SIGN boolean NOT NULL,
HID_PARTY int NOT NULL,
mt_add_hash varchar(100)
);
CREATE PROJECTION PERS_DATA.PARTY_CLS
(
PARTY_ID ENCODING COMMONDELTA_COMP,
PARTY_TYPE_CODE ENCODING BLOCK_DICT,
MT_PROC_ID ENCODING DELTAVAL,
MT_INS_DTTM ENCODING DELTAVAL,
MT_CHNG_DTTM ENCODING DELTAVAL,
MT_SOURCE_SYSTEM_CODE ENCODING BLOCK_DICT,
MT_SSYS_CODE,
MT_HASH,
MT_DEFAULT_SIGN,
SERVICE_START_DATE,
CDI_VALID_SIGN,
HID_PARTY,
mt_add_hash
)
AS
SELECT PARTY_CLS.PARTY_ID,
PARTY_CLS.PARTY_TYPE_CODE,
PARTY_CLS.MT_PROC_ID,
PARTY_CLS.MT_INS_DTTM,
PARTY_CLS.MT_CHNG_DTTM,
PARTY_CLS.MT_SOURCE_SYSTEM_CODE,
PARTY_CLS.MT_SSYS_CODE,
PARTY_CLS.MT_HASH,
PARTY_CLS.MT_DEFAULT_SIGN,
PARTY_CLS.SERVICE_START_DATE,
PARTY_CLS.CDI_VALID_SIGN,
PARTY_CLS.HID_PARTY,
PARTY_CLS.mt_add_hash
FROM PERS_DATA.PARTY_CLS
ORDER BY PARTY_CLS.PARTY_ID
SEGMENTED BY hash(PARTY_CLS.PARTY_ID) ALL NODES KSAFE 1;
CREATE PROJECTION PERS_DATA.PARTY_CLS_ETL_CDI_UPD
(
MT_SOURCE_SYSTEM_CODE,
MT_SSYS_CODE,
MT_HASH,
mt_add_hash
)
AS
SELECT PARTY_CLS.MT_SOURCE_SYSTEM_CODE,
PARTY_CLS.MT_SSYS_CODE,
PARTY_CLS.MT_HASH,
PARTY_CLS.mt_add_hash
FROM PERS_DATA.PARTY_CLS
ORDER BY PARTY_CLS.mt_add_hash
SEGMENTED BY hash(PARTY_CLS.mt_add_hash) ALL NODES KSAFE 1;
---I_PARTY_STG
CREATE TABLE PERS_DATA_STG.I_PARTY_CLS
(
PARTY_ID int,
PARTY_TYPE_CODE varchar(100),
MT_PROC_ID int,
MT_INS_DTTM timestamp,
MT_CHNG_DTTM timestamp,
MT_SOURCE_SYSTEM_CODE int,
MT_SSYS_CODE varchar(512),
MT_HASH int,
MT_DEFAULT_SIGN boolean,
SERVICE_START_DATE date,
CDI_VALID_SIGN boolean,
HID_PARTY int,
IND_UPDATE varchar(1),
mt_add_hash varchar(100)
);
CREATE PROJECTION PERS_DATA_STG.I_PARTY_CLS
(
PARTY_ID,
PARTY_TYPE_CODE,
MT_PROC_ID,
MT_INS_DTTM,
MT_CHNG_DTTM,
MT_SOURCE_SYSTEM_CODE,
MT_SSYS_CODE,
MT_HASH,
MT_DEFAULT_SIGN,
SERVICE_START_DATE,
CDI_VALID_SIGN,
HID_PARTY,
IND_UPDATE,
mt_add_hash
)
AS
SELECT I_PARTY_CLS.PARTY_ID,
I_PARTY_CLS.PARTY_TYPE_CODE,
I_PARTY_CLS.MT_PROC_ID,
I_PARTY_CLS.MT_INS_DTTM,
I_PARTY_CLS.MT_CHNG_DTTM,
I_PARTY_CLS.MT_SOURCE_SYSTEM_CODE,
I_PARTY_CLS.MT_SSYS_CODE,
I_PARTY_CLS.MT_HASH,
I_PARTY_CLS.MT_DEFAULT_SIGN,
I_PARTY_CLS.SERVICE_START_DATE,
I_PARTY_CLS.CDI_VALID_SIGN,
I_PARTY_CLS.HID_PARTY,
I_PARTY_CLS.IND_UPDATE,
I_PARTY_CLS.mt_add_hash
FROM PERS_DATA_STG.I_PARTY_CLS
ORDER BY I_PARTY_CLS.mt_add_hash,
I_PARTY_CLS.IND_UPDATE
SEGMENTED BY hash(I_PARTY_CLS.mt_add_hash) ALL NODES KSAFE 1;
select analyze_statistics('pers_data_stg.i_party_cls');
select analyze_statistics('pers_data.party_cls');
explain update PERS_DATA_STG.I_PARTY_CLS S set IND_UPDATE = case when S.MT_HASH = T.MT_HASH then 'N' else 'U' end from PERS_DATA.PARTY_CLS T where IND_UPDATE = 'I' and S.MT_ADD_HASH = T.MT_ADD_HASH
Access Path:
+-DML UPDATE [Cost: 0, Rows: 0]
| Target Projection: PERS_DATA_STG.I_PARTY_CLS_b1 (NO DELETE)
| Target Projection: PERS_DATA_STG.I_PARTY_CLS_b0 (NO DELETE)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN HASH [Semi] [Cost: 2M, Rows: 18M] (PATH ID: 1) Inner (BROADCAST)
| | Join Cond: (collation(I_PARTY_CLS.mt_add_hash, 'ru_RU') = collation(VAL(2), 'ru_RU'))
| | Materialize at Output: I_PARTY_CLS.PARTY_ID, I_PARTY_CLS.PARTY_TYPE_CODE, I_PARTY_CLS.MT_PROC_ID, I_PARTY_CLS.MT_INS_DTTM, I_PARTY_CLS.MT_CHNG_DTTM, I_PARTY_CLS.MT_SOURCE_SYSTEM_CODE, I_PARTY_CLS.MT_SSYS_CODE, I_PARTY_CLS.MT_HASH, I_PARTY_CLS.MT_DEFAULT_SIGN, I_PARTY_CLS.SERVICE_START_DATE, I_PARTY_CLS.CDI_VALID_SIGN, I_PARTY_CLS.HID_PARTY, I_PARTY_CLS.IND_UPDATE, I_PARTY_CLS.epoch
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for <No Alias> [Cost: 35K, Rows: 18M] (PATH ID: 2)
| | | Projection: PERS_DATA_STG.I_PARTY_CLS_b0
| | | Materialize: I_PARTY_CLS.mt_add_hash
| | | Filter: (I_PARTY_CLS.IND_UPDATE = 'I')
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP1(HashJoin): collation(I_PARTY_CLS.mt_add_hash, 'ru_RU'))
| | +-- Inner -> STORAGE ACCESS for <No Alias> [Cost: 35K, Rows: 18M] (PATH ID: 3)
| | | Projection: PERS_DATA.PARTY_CLS_ETL_CDI_UPD_b0
| | | Materialize: VAL(0), VAL(0)
| | | Execute on: All Nodes
+-Target Projection: PERS_DATA_STG.I_PARTY_CLS_b1 (DELETE ON CONTAINER)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN HASH [Semi] [Cost: 2M, Rows: 18M] (PATH ID: 1) Inner (BROADCAST)
| | Join Cond: (collation(S.mt_add_hash, 'ru_RU') = collation(T.mt_add_hash, 'ru_RU'))
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for S [Cost: 35K, Rows: 18M] (PATH ID: 2)
| | | Projection: PERS_DATA_STG.I_PARTY_CLS_b1
| | | Materialize: S.mt_add_hash
| | | Filter: (S.IND_UPDATE = 'I')
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP2(HashJoin): collation(S.mt_add_hash, 'ru_RU'))
| | +-- Inner -> STORAGE ACCESS for T [Cost: 26K, Rows: 18M] (PATH ID: 3)
| | | Projection: PERS_DATA.PARTY_CLS_ETL_CDI_UPD_b0
| | | Materialize: T.mt_add_hash
| | | Execute on: All Nodes
+-Target Projection: PERS_DATA_STG.I_PARTY_CLS_b0 (DELETE ON CONTAINER)
| Target Prep:
| Execute on: All Nodes
| +---> JOIN HASH [Semi] [Cost: 2M, Rows: 18M] (PATH ID: 1) Inner (BROADCAST)
| | Join Cond: (collation(S.mt_add_hash, 'ru_RU') = collation(T.mt_add_hash, 'ru_RU'))
| | Execute on: All Nodes
| | +-- Outer -> STORAGE ACCESS for S [Cost: 35K, Rows: 18M] (PATH ID: 2)
| | | Projection: PERS_DATA_STG.I_PARTY_CLS_b0
| | | Materialize: S.mt_add_hash
| | | Filter: (S.IND_UPDATE = 'I')
| | | Execute on: All Nodes
| | | Runtime Filter: (SIP3(HashJoin): collation(S.mt_add_hash, 'ru_RU'))
| | +-- Inner -> STORAGE ACCESS for T [Cost: 26K, Rows: 18M] (PATH ID: 3)
| | | Projection: PERS_DATA.PARTY_CLS_ETL_CDI_UPD_b0
| | | Materialize: T.mt_add_hash
| | | Execute on: All Nodes
I gues that point is the collation function which Vertica is implement in join condition. Can I rid of collation in query execution plan?
I found sollution by mysefl. After changing local on databse the collation in execution plan gone and mege join works. Anyway I am still intersting why Vertica did it?