How to investigate a delay of Copy statement.

Kaito · November 2018

After some environments changed,existing Copy statements become slower. If you have any knowledge and tips to find the bottleneck, could you give me your advice?

RaghavA · November 2018

Hi Kaito,

To investigate such issues, we follow below procedure to find any bottlenecks in the COPY command performance.

Step 1: Calculate the delay from load_streams to see how much is the delay in current system behavior:

select 
  table_name, 
  transaction_id, 
  statement_id, 
  (sysdate - load_start::timestamp) as "DELAY",
  round(load_duration_ms/60000,2.0) "load_duration_mins",
  round(input_file_size_bytes/1024^3,2.0) as "INPUT_GB", 
  round(read_bytes/1024^3,2.0) as "READ_GB", 
  parse_complete_percent,
  sort_complete_percent 
from load_streams order by table_name;

Steps 2: Extact the details from dc_execution_engine_profiles, dc_sips_statistics, resource_acquisitions to see the bottleneck:

Just save the below lines into SQL file, replace the trxid & stmtid from above SQL and run this script.

\timing
\set trxid 495xxxxxxxxxxxxx753
\set stmtid 1
\o prf_tun.out
\set QUIET on
\pset null '(null)'

SELECT node_name, operator_name, path_id, sum(counter_value) counter_sum, count(DISTINCT operator_id) num_operators FROM dc_execution_engine_profiles WHERE transaction_id = :trxid AND statement_id = :stmid AND counter_name ilike 'execution%'
GROUP BY 1,2,3 ORDER BY 4 DESC LIMIT 20;

\set pathid
SELECT operator_name, counter_name, path_id, COUNT(DISTINCT operator_id), AVG(counter_value) FROM 
execution_engine_profiles where transaction_id = :trxid AND statement_id = :stmid AND path_id = :pathid AND node_name = (select local_node_name()) GROUP BY 1,2,3 HAVING SUM(COUNTER_VALUE) > 0 ORDER BY 1;

SELECT path_id, operator_name, COUNT(DISTINCT operator_id) num_operators, COUNT(DISTINCT node_name) num_nodes,
SUM(DECODE(counter_name, 'bytes received',counter_value, NULL)) AS 'bytes_received',
SUM(DECODE(counter_name, 'bytes sent',counter_value, NULL)) AS 'bytes_sent',
SUM(DECODE(counter_name, 'execution time (us)',counter_value, NULL)) AS 'execution_time_us',
SUM(DECODE(counter_name, 'rows received',counter_value, NULL)) AS 'rows_received',
SUM(DECODE(counter_name, 'rle rows produced',counter_value, NULL)) AS 'rle_rows_produced',
SUM(DECODE(counter_name, 'rows received',counter_value, NULL)) AS 'rows_prod',
SUM(DECODE(counter_name, 'consumer stall(us)',counter_value, NULL)) AS 'consumer_stall_us',
SUM(DECODE(counter_name, 'producer stall(us)',counter_value, NULL)) AS 'prod_stall_us',
SUM(DECODE(counter_name, 'cumulative size of raw temp data (bytes)',counter_value, NULL)) AS 'temp_data'
FROM execution_engine_profiles
WHERE transaction_id = :trxid AND statement_id = :stmid AND node_name = (select local_node_name())
GROUP BY 1,2 ORDER BY 1,2;

SELECT node_name, time, sip_expr_id, sip_entries, rows_processed, rows_pruned, blocks_processed, blocks_pruned, block_pruned_by_valuelist
FROM dc_sips_statistics
WHERE transaction_id = :trxid AND statement_id = :stmid AND node_name = (select local_node_name());

Few more SQL to help you to see if bottleneck is due to resource usage:

-- Show load statistics with resource usage

select q.query_start,
       q.schema_name,
       q.table_name,
       (q.query_duration_us/1000000)::int as sec,
       q.processed_row_count as rows,
       (q.processed_row_count/(q.query_duration_us/1000000))::int as rps,
       (q.query ilike '% direct %') as is_direct,
       avg(r.average_cpu_usage_percent)::numeric(18,2) avg_cpu,
       avg(r.io_read_kbytes_per_second/1024)::int as avg_mbps_read,
       avg(r.io_written_kbytes_per_second/1024)::int as avg_mbps_write
  from QUERY_PROFILES q join
       SYSTEM_RESOURCE_USAGE r
    on r.end_time between timestamp_round(q.query_start::timestamp,'mi') + '1 minute'::interval 
                      and timestamp_round((q.query_start::timestamp + (60+q.query_duration_us/1000000) * '1 second'::interval),'mi')
 where q.query_type = 'LOAD'
   and q.query_start::timestamp > sysdate-1/24
 group by 1, 2, 3, 4, 5, 6, 7
 order by q.query_start desc;


-- Show load statistics for 1 SQL 

 select q.query_start,
       q.schema_name,
       q.table_name,
       (q.query_duration_us/1000000)::int as sec,
       q.processed_row_count as rows,
       (q.processed_row_count/(q.query_duration_us/1000000))::int as rps,
       (q.query ilike '% direct %') as is_direct,
       avg(r.average_cpu_usage_percent)::numeric(18,2) avg_cpu,
       avg(r.io_read_kbytes_per_second/1024)::int as avg_mbps_read,
       avg(r.io_written_kbytes_per_second/1024)::int as avg_mbps_write
  from QUERY_PROFILES q join
       SYSTEM_RESOURCE_USAGE r
    on r.end_time between timestamp_round(q.query_start::timestamp,'mi') + '1 minute'::interval 
                      and timestamp_round((q.query_start::timestamp + (60+q.query_duration_us/1000000) * '1 second'::interval),'mi')
 where q.query_type = 'LOAD'
   and q.query_start::timestamp > sysdate-1/24
   and q.transaction_id=xxxxxxxxxxxxx
   and q.statement_id=xxxx
 group by 1, 2, 3, 4, 5, 6, 7
 order by q.query_start desc;

RaghavA · November 2018

This SQL script may also help you in debugging:

-- ------------------------------------------------------------------------
-- Query execution profiling. Before running this script profile the Q.
-- Set trxid and stmtid as shown in the Profile's hint
-- ------------------------------------------------------------------------

\set trxid 49539595933799753
\set stmtid 1
\o qprof.out
\set QUIET on
\pset null '(null)'

-- ------------------------------------------------------------------------
-- Query text
-- ------------------------------------------------------------------------
\echo Step 0: Query text
\pset tuples_only
select
   request 
from 
   v_internal.dc_requests_issued 
where  
   transaction_id=:trxid and 
   statement_id=:stmtid 
;
\pset tuples_only

-- ------------------------------------------------------------------------
-- Query duration
-- ------------------------------------------------------------------------
\echo Step 1: Query duration
select
   query_duration_us 
from 
   v_monitor.query_profiles 
where 
   transaction_id=:trxid and 
   statement_id=:stmtid
;

-- ------------------------------------------------------------------------
-- Query execution steps
-- ------------------------------------------------------------------------
\echo Step 2: Query execution steps
select
   execution_step, 
   sum(completion_time - time) as elapsed
from 
   v_internal.dc_query_executions 
where 
   transaction_id=:trxid and 
   statement_id=:stmtid 
group by 
   1
;

-- ------------------------------------------------------------------------
-- Memory allocated
-- ------------------------------------------------------------------------
\echo Step 3: Memory allocated
select
   node_name,
   request_type,
   pool_name, 
   sum(memory_kb) as memory_kb
from 
   v_internal.dc_resource_acquisitions 
where 
   transaction_id=:trxid and 
   statement_id=:stmtid 
group by 
   1,
   2,
   3
order by
   2,
   1
;
\qecho Please note:
\qecho Allocated = memory used during the planning phase
\qecho Reserve = memory used during the execution phase

-- ------------------------------------------------------------------------
-- Query plan
-- ------------------------------------------------------------------------
\echo Step 4: Query execution plan
select
   path_line 
from 
   v_monitor.query_plan_profiles 
where 
   transaction_id=:trxid and 
   statement_id=:stmtid 
order by 
   path_id,
   path_line_index
;

-- ------------------------------------------------------------------------
-- Elapsed & memory allocated by path_id
-- ------------------------------------------------------------------------
\echo Step 5: Elapsed & memory allocated by path_id
select
   path_id, 
   running_time, 
   memory_allocated_bytes, 
   path_line 
from 
   v_monitor.query_plan_profiles 
where 
   path_line_index = 1 and 
   transaction_id=:trxid and 
   statement_id=:stmtid 
order by 
   path_id
;

-- ------------------------------------------------------------------------
-- Query events
-- ------------------------------------------------------------------------
\pset expanded
\echo Step 6: Query events
select 
   event_type, event_description, count(*) 
from
   v_monitor.query_events 
where 
   transaction_id=:trxid and 
   statement_id=:stmtid 
group by event_type, event_description
order by 
   3 desc
;
\pset expanded

-- ------------------------------------------------------------------------
-- Query execution report
-- ------------------------------------------------------------------------
\echo Step 7: Query execution report
select
       node_name ,
       operator_name,
       path_id,
       ( sum ( case counter_name when 'execution time (us)' then counter_value else null end ) / 1000 )::numeric(12,3) as "exec time(ms)",
       ( sum ( case counter_name when 'clock time (us)' then counter_value else null end ) / 1000 )::numeric(12,3) as "clock time (ms)",
       sum ( case counter_name when 'estimated rows produced' then counter_value else null end ) as "est. rows",
       sum ( case counter_name when 'rows processed' then counter_value else null end ) as "rows processed",
       sum ( case counter_name when 'rows produced' then counter_value else null end ) as "rows produced",
       ( sum ( case counter_name when 'memory reserved (bytes)' then counter_value else null end ) / 1048576 )::numeric(12,1) as "memory reserved (MiB)",
       ( sum ( case counter_name when 'memory allocated (bytes)' then counter_value else null end ) / 1048576 )::numeric(12,1) as "memory allocated (MiB)"
from
       v_monitor.execution_engine_profiles
where
       transaction_id = :trxid and
       statement_id = :stmtid and
       counter_value/1000000 > 0 and
       counter_name in (
               'execution time (us)',
               'clock time (us)',
               'estimated rows produced',
               'rows processed',
               'rows produced',
               'memory reserved (bytes)',
               'memory allocated (bytes)')
group by
       1,
       2,
       3
order by
       5 desc
;

Kaito · December 2018

RaghavA, Thanks alot!

How to investigate a delay of Copy statement.

Comments

Leave a Comment