Data Warehouse 大資料匯入的優化手段
external table 截入cvs和dmp 1 2 3 4 5 6 7 8 9 10 11 CREATE TABLE emp_ext ( id NUMBER(4 ), addr VARCHAR2(10 ), ctime DATE ) ORGANIZATION EXTERNAL ( TYPE CSV DEFAULT DIRECTORY data_dir LOCATION ('test.csv' ) ) REJECT LIMIT UNLIMITED;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 CREATE TABLE emptest_ext ( id NUMBER(4 ), addr VARCHAR2(10 ), ctime DATE ) ORGANIZATION EXTERNAL ( TYPE ORACLE_DATAPUMP DEFAULT DIRECTORY dump_dir LOCATION ('emptest.dmp' ) ) PARALLEL 4 REJECT LIMIT UNLIMITED; INSERT INTO emptest_ext SELECT * FROM emptest;CREATE TABLE emp_ext ( id NUMBER(4 ), addr VARCHAR2(10 ), ctime DATE ) ORGANIZATION EXTERNAL ( TYPE ORACLE_DATAPUMP DEFAULT DIRECTORY data_dir LOCATION ('emptest.dmp' ) ) REJECT LIMIT UNLIMITED;
適合資料量很大,並且都是Full Table Scan處理的情境,要留意是無法建立index
DBMS_DATAPUMP 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 DECLARE v_handle NUMBER; v_status VARCHAR2(20 ); BEGIN v_handle := DBMS_DATAPUMP.OPEN( operation = > 'EXPORT' , job_mode = > 'TABLE' , remote_link = > 'DBLINK_NAME' ); DBMS_DATAPUMP.ADD_FILE( handle = > v_handle, filename= > 'EXPORT_DIR:table_export.dmp' , directory = > 'EXPORT_DIR' , filetype = > DBMS_DATAPUMP.KU$_FILE_TYPE_DUMP_FILE); DBMS_DATAPUMP.ADD_FILE( handle = > v_handle, filename= > 'EXPORT_DIR:table_export.log' , directory = > 'EXPORT_DIR' , filetype = > DBMS_DATAPUMP.KU$_FILE_TYPE_LOG_FILE); DBMS_DATAPUMP.METADATA_FILTER( handle = > v_handle, name = > 'SCHEMA_EXPR' , value = > 'IN(''SCHEMA_NAME'')' ); DBMS_DATAPUMP.START_JOB(handle = > v_handle); DBMS_DATAPUMP.WAIT_FOR_JOB(handle = > v_handle, job_state = > v_status); DBMS_DATAPUMP.DETACH(handle = > v_handle); END ;/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 DECLARE v_handle NUMBER; v_status VARCHAR2(20 ); BEGIN v_handle := DBMS_DATAPUMP.OPEN( operation = > 'IMPORT' , job_mode = > 'TABLE' , remote_link = > 'DBLINK_NAME' ); DBMS_DATAPUMP.ADD_FILE( handle = > v_handle, filename= > 'IMPORT_DIR:table_export.dmp' , directory = > 'IMPORT_DIR' , filetype = > DBMS_DATAPUMP.KU$_FILE_TYPE_DUMP_FILE); DBMS_DATAPUMP.ADD_FILE( handle = > v_handle, filename= > 'IMPORT_DIR:table_export.log' , directory = > 'IMPORT_DIR' , filetype = > DBMS_DATAPUMP.KU$_FILE_TYPE_LOG_FILE); DBMS_DATAPUMP.METADATA_REMAP( handle = > v_handle, name = > 'REMAP_SCHEMA' , old_value = > 'OLD_SCHEMA_NAME' , new_value = > 'NEW_SCHEMA_NAME' ); DBMS_DATAPUMP.START_JOB(handle = > v_handle); DBMS_DATAPUMP.WAIT_FOR_JOB(handle = > v_handle, job_state = > v_status); DBMS_DATAPUMP.DETACH(handle = > v_handle); END ;/
使用DBMS的方式不落地用expdp和impdp,一直在db裏完成,不需要進行os操作
Hint – insert direct path
INSERT /*+ APPEND */ 避開Buufer,直寫入DataFile,但要留意index會導致失效
Partition Table 1 2 3 4 5 6 7 8 9 10 CREATE TABLE emp ( id NUMBER, t_date DATE, amount NUMBER ) PARTITION BY RANGE(t_date) INTERVAL(NUMTOYMINTERVAL(1, 'MONTH')) ( PARTITION p1 VALUES LESS THAN (TO_DATE('2022-01-01', 'YYYY-MM-DD')) );
自動增加Partition
1 2 3 4 5 6 7 ALTER TABLE emp ENABLE ROW MOVEMENT; --一定要 EXEC DBMS_PART_ADMIN.ENABLE_AUTO_PARTITIONING( table_name => 'emp', interval => 'INTERVAL(NUMTOYMINTERVAL(1, ''MONTH''))', start_date => '2022-01-01', retention => 'INTERVAL ''1'' MONTH' --個為間隔單位 );
自動刪除舊的Partition (19c可以用,較舊的版本要測一下)
1 2 3 4 5 6 7 BEGIN DBMS_PART_ADMIN.ALTER_RETENTION( table_name => 'emp', retention => 'INTERVAL ''1'' MONTH' ); END; /
針對Index再優化 Partial Indexes
1 2 3 4 CREATE INDEX idx_emp_partialON emp(t_date)WHERE t_date >= '2022-01-01' ;
index生效的方式
1 2 3 SELECT * FROM t_dateWHERE t_date >= '2022-02-01' and t_date <= '2022-04-01'
刪除和新增資料佔用I/O的時間錯開
換別的OLAP DB
考量用apache doris、clickhouse,要考量是否能維運
結論
1.首選先考慮升級成SSD
2.了解AP邏輯運作後再看有多少空檔時間可做維護,再選擇方案