Here is some engineering perspective:
The load_streams table (now) contains data on both ongoing loads and historical loads -- but the underlying mechanism for the historical data is not particularly optimized and can be big.
on
6.0, If you want a faster way to get access to the information in load_streams on
currently running load streams (no loads that have finished), you can try the following query (which is the definition of v_monitor.load_streams with all references to v_monitor.execution_engine_profiles changed to vs_execution_engine_profiles)
Your mileage will vary -- this query will not be officially supported, etc.
Code: Select all
-- Load metrics for each load stream on each node.
-- input_size_file_bytes & parse_complete should be NULL if loading from STDIN or FIFO.
-- However, sum() returns an int if load is from say files and some FIFOs. To get around this problem, the sub-query returns total_rows & not_null rows.
-- If they arent equal, then one of the sources is a FIFO and NULL is chosen instead of sum(input_file_size_bytes)
select session_id,
table_accepted_row_count.transaction_id,
table_accepted_row_count.statement_id,
stream_name, schema_name,
table_name,
load_start,
load_duration_ms,
is_executing,
accepted_row_count,
rejected_row_count,
read_bytes,
case when total_rows = num_not_nulls then input_size
else null end as input_file_size_bytes,
case when total_rows = num_not_nulls and input_size > 0 then read_bytes * 100 // input_size
else null end as parse_complete_percent,
unsorted_row_count,
sorted_row_count,
(case when unsorted_row_count > 0 then sorted_row_count * 100 // unsorted_row_count else null end) as sort_complete_percent
from
(select transaction_id, statement_id, session_id, identifier as stream_name,
query_start as load_start,
query_duration_us // 1000 as load_duration_ms,
is_executing
from v_monitor.query_profiles
where query_type = 'LOAD' and (error_code = 0 or error_code is null)) as query_profiles
natural join
(select transaction_id, statement_id, sum(counter_value) as accepted_row_count
from vs_execution_engine_profiles
where operator_name = 'Load' and counter_name = 'rows produced'
group by transaction_id, statement_id) as table_accepted_row_count
natural join
(select distinct transaction_id, statement_id, table_schema as schema_name, table_name
from v_internal.dc_projections_used) as table_schema_name
natural join
(select transaction_id, statement_id, sum(counter_value) as rejected_row_count
from vs_execution_engine_profiles
where operator_name = 'Load' and counter_name = 'rows rejected'
group by transaction_id, statement_id) as table_rejected_row_count
natural join
(select transaction_id, statement_id, count(*) as total_rows,
count(counter_value) as num_not_nulls, sum(counter_value) as input_size
from vs_execution_engine_profiles
where (operator_name = 'Load' or operator_name = 'LoadUnion') and counter_tag = 'main' and counter_name like 'input size%'
group by transaction_id, statement_id) as table_input_file_size
natural join
(select transaction_id, statement_id, sum(counter_value) as read_bytes
from vs_execution_engine_profiles
where operator_name = 'Load' and counter_name like 'read %'
group by 1,2) as table_read_bytes
natural join
(select transaction_id, statement_id, sum(counter_value) as unsorted_row_count
from vs_execution_engine_profiles
where operator_name = 'DataTarget' and counter_name = 'input rows'
group by 1,2) as table_unsorted_row_count
natural join
(select transaction_id, statement_id, sum(counter_value) as sorted_row_count
from vs_execution_engine_profiles
where operator_name = 'DataTarget' and counter_name = 'written rows'
group by 1,2) as table_sorted_row_count
order by is_executing desc, load_start desc
;