The Vertica Forum is getting a makeover! The site will be undergoing maintenance from Tuesday 8/13 to Friday 8/16. We appreciate your patience and cooperation during this time.

# How to investigate a delay of Copy statement.

After some environments changed,existing Copy statements become slower. If you have any knowledge and tips to find the bottleneck, could you give me your advice?

• Hi Kaito,

To investigate such issues, we follow below procedure to find any bottlenecks in the COPY command performance.

Step 1: Calculate the delay from load_streams to see how much is the delay in current system behavior:

select
table_name,
transaction_id,
statement_id,
round(input_file_size_bytes/1024^3,2.0) as "INPUT_GB",
parse_complete_percent,
sort_complete_percent

Steps 2: Extact the details from dc_execution_engine_profiles, dc_sips_statistics, resource_acquisitions to see the bottleneck:

Just save the below lines into SQL file, replace the trxid & stmtid from above SQL and run this script.

\timing
\set trxid 495xxxxxxxxxxxxx753
\set stmtid 1
\o prf_tun.out
\set QUIET on
\pset null '(null)'

SELECT node_name, operator_name, path_id, sum(counter_value) counter_sum, count(DISTINCT operator_id) num_operators FROM dc_execution_engine_profiles WHERE transaction_id = :trxid AND statement_id = :stmid AND counter_name ilike 'execution%'
GROUP BY 1,2,3 ORDER BY 4 DESC LIMIT 20;

\set pathid
SELECT operator_name, counter_name, path_id, COUNT(DISTINCT operator_id), AVG(counter_value) FROM
execution_engine_profiles where transaction_id = :trxid AND statement_id = :stmid AND path_id = :pathid AND node_name = (select local_node_name()) GROUP BY 1,2,3 HAVING SUM(COUNTER_VALUE) > 0 ORDER BY 1;

SELECT path_id, operator_name, COUNT(DISTINCT operator_id) num_operators, COUNT(DISTINCT node_name) num_nodes,
SUM(DECODE(counter_name, 'bytes sent',counter_value, NULL)) AS 'bytes_sent',
SUM(DECODE(counter_name, 'execution time (us)',counter_value, NULL)) AS 'execution_time_us',
SUM(DECODE(counter_name, 'rle rows produced',counter_value, NULL)) AS 'rle_rows_produced',
SUM(DECODE(counter_name, 'rows received',counter_value, NULL)) AS 'rows_prod',
SUM(DECODE(counter_name, 'consumer stall(us)',counter_value, NULL)) AS 'consumer_stall_us',
SUM(DECODE(counter_name, 'producer stall(us)',counter_value, NULL)) AS 'prod_stall_us',
SUM(DECODE(counter_name, 'cumulative size of raw temp data (bytes)',counter_value, NULL)) AS 'temp_data'
FROM execution_engine_profiles
WHERE transaction_id = :trxid AND statement_id = :stmid AND node_name = (select local_node_name())
GROUP BY 1,2 ORDER BY 1,2;

SELECT node_name, time, sip_expr_id, sip_entries, rows_processed, rows_pruned, blocks_processed, blocks_pruned, block_pruned_by_valuelist
FROM dc_sips_statistics
WHERE transaction_id = :trxid AND statement_id = :stmid AND node_name = (select local_node_name());

Few more SQL to help you to see if bottleneck is due to resource usage:

-- Show load statistics with resource usage

select q.query_start,
q.schema_name,
q.table_name,
(q.query_duration_us/1000000)::int as sec,
q.processed_row_count as rows,
(q.processed_row_count/(q.query_duration_us/1000000))::int as rps,
(q.query ilike '% direct %') as is_direct,
avg(r.average_cpu_usage_percent)::numeric(18,2) avg_cpu,
avg(r.io_written_kbytes_per_second/1024)::int as avg_mbps_write
from QUERY_PROFILES q join
SYSTEM_RESOURCE_USAGE r
on r.end_time between timestamp_round(q.query_start::timestamp,'mi') + '1 minute'::interval
and timestamp_round((q.query_start::timestamp + (60+q.query_duration_us/1000000) * '1 second'::interval),'mi')
and q.query_start::timestamp > sysdate-1/24
group by 1, 2, 3, 4, 5, 6, 7
order by q.query_start desc;

-- Show load statistics for 1 SQL

select q.query_start,
q.schema_name,
q.table_name,
(q.query_duration_us/1000000)::int as sec,
q.processed_row_count as rows,
(q.processed_row_count/(q.query_duration_us/1000000))::int as rps,
(q.query ilike '% direct %') as is_direct,
avg(r.average_cpu_usage_percent)::numeric(18,2) avg_cpu,
avg(r.io_written_kbytes_per_second/1024)::int as avg_mbps_write
from QUERY_PROFILES q join
SYSTEM_RESOURCE_USAGE r
on r.end_time between timestamp_round(q.query_start::timestamp,'mi') + '1 minute'::interval
and timestamp_round((q.query_start::timestamp + (60+q.query_duration_us/1000000) * '1 second'::interval),'mi')
and q.query_start::timestamp > sysdate-1/24
and q.transaction_id=xxxxxxxxxxxxx
and q.statement_id=xxxx
group by 1, 2, 3, 4, 5, 6, 7
order by q.query_start desc;

-- ------------------------------------------------------------------------
-- Query execution profiling. Before running this script profile the Q.
-- Set trxid and stmtid as shown in the Profile's hint
-- ------------------------------------------------------------------------

\set trxid 49539595933799753
\set stmtid 1
\o qprof.out
\set QUIET on
\pset null '(null)'

-- ------------------------------------------------------------------------
-- Query text
-- ------------------------------------------------------------------------
\echo Step 0: Query text
\pset tuples_only
select
request
from
v_internal.dc_requests_issued
where
transaction_id=:trxid and
statement_id=:stmtid
;
\pset tuples_only

-- ------------------------------------------------------------------------
-- Query duration
-- ------------------------------------------------------------------------
\echo Step 1: Query duration
select
query_duration_us
from
v_monitor.query_profiles
where
transaction_id=:trxid and
statement_id=:stmtid
;

-- ------------------------------------------------------------------------
-- Query execution steps
-- ------------------------------------------------------------------------
\echo Step 2: Query execution steps
select
execution_step,
sum(completion_time - time) as elapsed
from
v_internal.dc_query_executions
where
transaction_id=:trxid and
statement_id=:stmtid
group by
1
;

-- ------------------------------------------------------------------------
-- Memory allocated
-- ------------------------------------------------------------------------
\echo Step 3: Memory allocated
select
node_name,
request_type,
pool_name,
sum(memory_kb) as memory_kb
from
v_internal.dc_resource_acquisitions
where
transaction_id=:trxid and
statement_id=:stmtid
group by
1,
2,
3
order by
2,
1
;
\qecho Allocated = memory used during the planning phase
\qecho Reserve = memory used during the execution phase

-- ------------------------------------------------------------------------
-- Query plan
-- ------------------------------------------------------------------------
\echo Step 4: Query execution plan
select
path_line
from
v_monitor.query_plan_profiles
where
transaction_id=:trxid and
statement_id=:stmtid
order by
path_id,
path_line_index
;

-- ------------------------------------------------------------------------
-- Elapsed & memory allocated by path_id
-- ------------------------------------------------------------------------
\echo Step 5: Elapsed & memory allocated by path_id
select
path_id,
running_time,
memory_allocated_bytes,
path_line
from
v_monitor.query_plan_profiles
where
path_line_index = 1 and
transaction_id=:trxid and
statement_id=:stmtid
order by
path_id
;

-- ------------------------------------------------------------------------
-- Query events
-- ------------------------------------------------------------------------
\pset expanded
\echo Step 6: Query events
select
event_type, event_description, count(*)
from
v_monitor.query_events
where
transaction_id=:trxid and
statement_id=:stmtid
group by event_type, event_description
order by
3 desc
;
\pset expanded

-- ------------------------------------------------------------------------
-- Query execution report
-- ------------------------------------------------------------------------
\echo Step 7: Query execution report
select
node_name ,
operator_name,
path_id,
( sum ( case counter_name when 'execution time (us)' then counter_value else null end ) / 1000 )::numeric(12,3) as "exec time(ms)",
( sum ( case counter_name when 'clock time (us)' then counter_value else null end ) / 1000 )::numeric(12,3) as "clock time (ms)",
sum ( case counter_name when 'estimated rows produced' then counter_value else null end ) as "est. rows",
sum ( case counter_name when 'rows processed' then counter_value else null end ) as "rows processed",
sum ( case counter_name when 'rows produced' then counter_value else null end ) as "rows produced",
( sum ( case counter_name when 'memory reserved (bytes)' then counter_value else null end ) / 1048576 )::numeric(12,1) as "memory reserved (MiB)",
( sum ( case counter_name when 'memory allocated (bytes)' then counter_value else null end ) / 1048576 )::numeric(12,1) as "memory allocated (MiB)"
from
v_monitor.execution_engine_profiles
where
transaction_id = :trxid and
statement_id = :stmtid and
counter_value/1000000 > 0 and
counter_name in (
'execution time (us)',
'clock time (us)',
'estimated rows produced',
'rows processed',
'rows produced',
'memory reserved (bytes)',
'memory allocated (bytes)')
group by
1,
2,
3
order by
5 desc
;

• RaghavA, Thanks alot!