Wednesday, August 1, 2018

Update bulk records (billions of records) table using DBMS_PARALLEL_EXECUTE

The DBMS_PARALLEL_EXECUTE package allows a workload associated with a base table to be broken down into smaller chunks which can be run in parallel. This can be used instead of bulk collect update if you are executing update or delete or insert operations on a single table.

--    Beginning of the code

DECLARE
   l_task       VARCHAR2 (30) := 'test_task';
   l_sql_stmt   VARCHAR2 (32767);
   l_try        NUMBER;
   l_status     NUMBER;
BEGIN
   -- Create the TASK
   DBMS_PARALLEL_EXECUTE.CREATE_TASK (l_task);

   -- Chunk the table by ROWID
   DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID (l_task
                                               , 'AR'
                                               , 'HZ_PARTIES'
                                               , TRUE
                                               , 10000);

   -- Execute the DML in parallel
   l_sql_stmt :=
      'UPDATE /*+ rowid (tbl) */ AR.HZ_PARTIES tbl
          SET PARTY_NAME=PARTY_NAME
        WHERE rowid BETWEEN :start_id AND :end_id';

   DBMS_PARALLEL_EXECUTE.RUN_TASK (l_task
                                 , l_sql_stmt
                                 , DBMS_SQL.NATIVE
                                 , parallel_level   => 32);

   -- If there is an error, RESUME it for at most 2 times.
   L_try := 0;
   L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS (l_task);

   WHILE (l_try < 2 AND L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
   LOOP
      L_try := l_try + 1;

      DBMS_PARALLEL_EXECUTE.RESUME_TASK (l_task);

      L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS (l_task);
   END LOOP;
   
   -- Done with processing; drop the task

   DBMS_PARALLEL_EXECUTE.DROP_TASK (l_task);
END;
/

--    End of the code

-- Check the task created using the DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID
SELECT   task_name, status
  FROM   user_parallel_execute_tasks
 WHERE   task_name = 'test_task';

--The USER_PARALLEL_EXECUTE_CHUNKS views display information about the individual chunks.
  SELECT   chunk_id
         , status
         , start_rowid
         , end_rowid
    FROM   user_parallel_execute_chunks
   WHERE   task_name = 'test_task'
ORDER BY   chunk_id;

-- If there were errors, the chunks can be queried to identify the problems.
  SELECT   status, COUNT (1) status_count
    FROM   user_parallel_execute_chunks
   WHERE   task_name = 'test_task'
GROUP BY   status;

-- Check the status of individual tasks created using the DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID
SELECT   job_name, status
  FROM   user_scheduler_job_run_details
 WHERE   job_name LIKE
            (SELECT   job_prefix || '%'
               FROM   user_parallel_execute_tasks
              WHERE   task_name = 'test_task');

-- You can see the activity ran in parallel by checking the SESSION_ID column that was set during the update.
  SELECT   session_id, COUNT ( * )
    FROM   test_tab
GROUP BY   session_id
ORDER BY   session_id;


/* Instead of using the below script you can use above script */
set verify on
set show on
set echo on
set feedback on
set timing on
spool bulk_collect_hz_parties_data_masking.log

SELECT   name FROM v$database;

SELECT   TO_CHAR (SYSDATE, 'mm/dd/yyyy hh24:mi') FROM DUAL;

DECLARE
   CURSOR rec_cur
   IS
      SELECT                                             /*+ PARALLEL(hp,6) */
            party_id
             , TRANSLATE (
                  party_name
                , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()~`?<>''''|:;-+[]{},./'
                , 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
               )
                  party_name
             , TRANSLATE (
                  person_first_name
                , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()~`?<>''''|:;-+[]{},./'
                , 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
               )
                  person_first_name
             , TRANSLATE (
                  person_middle_name
                , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()~`?<>''''|:;-+[]{},./'
                , 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
               )
                  person_middle_name
             , TRANSLATE (
                  person_last_name
                , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()~`?<>''''|:;-+[]{},./'
                , 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
               )
                  person_last_name
             , NVL2 (email_address, 'user@usps.gov', NULL)
             , TRANSLATE (primary_phone_number, '012345678', '999999999')
                  primary_phone_number
             , TRANSLATE (primary_phone_area_code, '012345678', '999999999')
                  primary_phone_area_code
             , TRANSLATE (primary_phone_country_code
                        , '012345678'
                        , '999999999')
                  primary_phone_country_code
        FROM   apps.hz_parties hp;

   TYPE vc1_party_id_t IS TABLE OF NUMBER;

   TYPE vc2_party_name_t IS TABLE OF VARCHAR2 (1000);

   TYPE vc2_person_first_name_t IS TABLE OF VARCHAR2 (1000);

   TYPE vc2_person_middle_name_t IS TABLE OF VARCHAR2 (1000);

   TYPE vc2_person_last_name_t IS TABLE OF VARCHAR2 (1000);

   TYPE vc2_email_address_t IS TABLE OF VARCHAR2 (1000);

   TYPE vc2_phone_number_t IS TABLE OF VARCHAR2 (256);

   TYPE vc2_phone_area_code_t IS TABLE OF VARCHAR2 (256);

   TYPE vc2_phone_country_code_t IS TABLE OF VARCHAR2 (256);

   vc1_party_id             vc1_party_id_t;
   vc2_party_name           vc2_party_name_t;
   vc2_person_first_name    vc2_person_first_name_t;
   vc2_person_middle_name   vc2_person_middle_name_t;
   vc2_person_last_name     vc2_person_last_name_t;
   vc2_email_address        vc2_email_address_t;
   vc2_phone_number         vc2_phone_number_t;
   vc2_phone_area_code      vc2_phone_area_code_t;
   vc2_phone_country_code   vc2_phone_country_code_t;
BEGIN
   OPEN rec_cur;

   LOOP
      FETCH rec_cur
         BULK COLLECT INTO
                                vc1_party_id, vc2_party_name, vc2_person_first_name, vc2_person_middle_name
                                , vc2_person_last_name, vc2_email_address, vc2_phone_number, vc2_phone_area_code
                                , vc2_phone_country_code
         LIMIT 10000;

      EXIT WHEN vc1_party_id.COUNT () = 0;

      FORALL i IN vc1_party_id.FIRST .. vc1_party_id.LAST
         UPDATE                                          /*+ PARALLEL(hp,6) */
               apps.hz_parties hp
            SET   party_name = vc2_party_name (i)
                , person_first_name = vc2_person_first_name (i)
                , person_middle_name = vc2_person_middle_name (i)
                , person_last_name = vc2_person_last_name (i)
                , email_address = vc2_email_address (i)
                , primary_phone_number = vc2_phone_number (i)
                , primary_phone_area_code = vc2_phone_area_code (i)
                , primary_phone_country_code = vc2_phone_country_code (i)
          WHERE   party_id = vc1_party_id (i);

      COMMIT;
   END LOOP;

   CLOSE rec_cur;

   COMMIT;
END;
/

SELECT   TO_CHAR (SYSDATE, 'mm/dd/yyyy hh24:mi') FROM DUAL;

spool off
set verify off
set show off
set feedback off
set timing off
 

No comments:

Post a Comment