客服微信

本文为云贝教育 刘峰 原创,请尊重知识产权,转发请注明出处,不接受任何抄袭、演绎和未经注明出处的转载。
外部表允许 Oracle 查询存储在数据库外部平面文件中的数据。 ORACLE_LOADER 驱动程序可用于访问以任何格式存储,主要由 SQL*Loader 和DUMP加载的数据。不能对外部表执行 DML,但它们可以用于查询、联接和排序操作。可以针对外部表创建视图和同义词。它们在数据仓库的 ETL 过程中非常有用,因为数据不需要暂存并且可以并行查询。它们不应该用于频繁查询的表。
创建一个指向文件位置的目录对象。
mkdir -p /home/oracle/loader_dir CREATE OR REPLACE DIRECTORY ext_tab_data AS '/home/oracle/loader_dir'; grant read,write on directory ext_tab_data to public;
目标数据如下
#Countries1.txt ENG,England,English SCO,Scotland,English IRE,Ireland,English WAL,Wales,Welsh #Countries2.txt FRA,France,French GER,Germany,German USA,Unites States of America,English
使用 CREATE TABLE..ORGANIZATION EXTERNAL 语法创建外部表。这定义了表的元数据,描述它应该如何显示以及如何加载数据。
CREATE TABLE countries_ext (
country_code VARCHAR2(5),
country_name VARCHAR2(50),
country_language VARCHAR2(50)
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY ext_tab_data
ACCESS PARAMETERS (
RECORDS DELIMITED BY NEWLINE
FIELDS TERMINATED BY ','
MISSING FIELD VALUES ARE NULL
(
country_code CHAR(5),
country_name CHAR(50),
country_language CHAR(50)
)
)
LOCATION ('Countries1.txt','Countries2.txt')
)
PARALLEL 4
REJECT LIMIT UNLIMITED;
外部表创建后,就可以像普通表一样进行查询。
SELECT *
FROM countries_ext
ORDER BY country_name;
COUNT COUNTRY_NAME COUNTRY_LANGUAGE
----- ---------------------------- -----------------------------
ENG England English
FRA France French
GER Germany German
IRE Ireland English
SCO Scotland English
USA Unites States of America English
WAL Wales Welsh
7 rows selected.
如果加载文件尚未保存在适当的目录中,将显示以下结果。
SQL> SELECT * 2 FROM countries_ext 3 ORDER BY country_name; SELECT * * ERROR at line 1: ORA-29913: error in executing ODCIEXTTABLEOPEN callout ORA-29400: data cartridge error KUP-04040: file Countries1.txt in EXT_TAB_DATA not found ORA-06512: at "SYS.ORACLE_LOADER", line 14 ORA-06512: at line 1
一旦表正常运行,我们就可以针对它创建视图。
CREATE OR REPLACE VIEW english_speaking_countries AS SELECT * FROM countries_ext WHERE country_language = 'English' ORDER BY country_name; SELECT * FROM english_speaking_countries; COUNT COUNTRY_NAME LANGUAGE ----- ---------------------------- ---------------------------- ENG England English IRE Ireland English SCO Scotland English USA Unites States of America English 4 rows selected.
默认情况下,加载操作日志创建在与加载文件相同的目录中,但可以使用 LOGFILE 参数更改此设置。
以下示例展示了如何使用外部表来查询警报日志的内容。
CREATE OR REPLACE DIRECTORY bdump AS '/opt/oracle/diag/rdbms/orclcdb/ORCLCDB/trace/';
DROP TABLE alert_log;
CREATE TABLE alert_log (
line VARCHAR2(4000)
)
ORGANIZATION EXTERNAL
(
TYPE ORACLE_LOADER
DEFAULT DIRECTORY bdump
ACCESS PARAMETERS
(
RECORDS DELIMITED BY NEWLINE
BADFILE bdump:'read_alert_%a_%p.bad'
LOGFILE bdump:'read_alert_%a_%p.log'
FIELDS TERMINATED BY '~'
MISSING FIELD VALUES ARE NULL
(
line CHAR(4000)
)
)
LOCATION ('alert_orcl.log ')
)
PARALLEL 1
REJECT LIMIT UNLIMITED
/
SET LINESIZE 1000
SELECT * FROM alert_log;
1.3、PREPROCESSOR
PREPROCESSOR 子句来标识用于在外部表读取文件之前处理文件的目录对象和脚本。 PREPROCESSOR 子句对于读取压缩文件特别有用,因为它们被解压缩并直接通过管道传输到外部表进程中,而无需在文件系统上解压缩。
CREATE OR REPLACE DIRECTORY exec_dir AS '/usr/bin';
drop table countries_ext;
CREATE TABLE countries_ext (
country_code VARCHAR2(5),
country_name VARCHAR2(50),
country_language VARCHAR2(50)
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY ext_tab_data
ACCESS PARAMETERS (
RECORDS DELIMITED BY NEWLINE
PREPROCESSOR exec_dir:'zcat'
FIELDS TERMINATED BY ','
MISSING FIELD VALUES ARE NULL
(
country_code CHAR(5),
country_name CHAR(50),
country_language CHAR(50)
)
)
LOCATION ('Countries1.txt.gz','Countries2.txt.gz')
)
PARALLEL 5
REJECT LIMIT UNLIMITED;
select * from countries_ext;
必须对包含可执行文件或脚本的目录授予 EXECUTE 权限。
如果您需要指定命令行参数,您应该创建一个脚本来执行该操作,然后在 PREPROCESSOR 子句中调用该脚本。例如,假设我们在可执行目录中有一个名为“my_unzip.sh”的可执行文件,其中包含以下内容。
#!/bin/bash /bin/gunzip -c $1
我们可以在 PREPROCESSOR 子句中使用以下内容。
PREPROCESSOR exec_dir:'my_unzip.sh'
1.4、相关视图
SELECT COUNT(*) FROM dba_external_tables;
COUNT(*)
----------
194
CONN scott/tiger@pdb1
-- Delete if it already exists.
DROP TABLE emp_ext;
HOST rm /tmp/emp_ext.dmp
-- Unload EMP table into dump file using compression.
CREATE TABLE emp_ext
ORGANIZATION EXTERNAL
(
TYPE ORACLE_DATAPUMP
DEFAULT DIRECTORY TEST_DIR
ACCESS PARAMETERS (COMPRESSION ENABLED MEDIUM)
LOCATION ('emp_ext.dmp')
)
AS SELECT * FROM emp;
-- Create a new external table using the compressed dump file.
DROP TABLE emp2_ext;
CREATE TABLE emp2_ext (
EMPNO NUMBER(4),
ENAME VARCHAR2(10),
JOB VARCHAR2(9),
MGR NUMBER(4),
HIREDATE DATE,
SAL NUMBER(7,2),
COMM NUMBER(7,2),
DEPTNO NUMBER(2))
ORGANIZATION EXTERNAL (
TYPE ORACLE_DATAPUMP
DEFAULT DIRECTORY TEST_DIR
LOCATION ('emp_ext.dmp')
);
SELECT * FROM emp2_ext;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-DEC-80 800 20
7499 ALLEN SALESMAN 7698 20-FEB-81 1600 300 30
7521 WARD SALESMAN 7698 22-FEB-81 1250 500 30
7566 JONES MANAGER 7839 02-APR-81 2975 20
7654 MARTIN SALESMAN 7698 28-SEP-81 1250 1400 30
7698 BLAKE MANAGER 7839 01-MAY-81 2850 30
7782 CLARK MANAGER 7839 09-JUN-81 2450 10
7788 SCOTT ANALYST 7566 19-APR-87 3000 20
7839 KING PRESIDENT 17-NOV-81 5000 10
7844 TURNER SALESMAN 7698 08-SEP-81 1500 0 30
7876 ADAMS CLERK 7788 23-MAY-87 1100 20
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7900 JAMES CLERK 7698 03-DEC-81 950 30
7902 FORD ANALYST 7566 03-DEC-81 3000 20
7934 MILLER CLERK 7782 23-JAN-82 1300 10
14 rows selected

为了演示外部表,我们需要一些平面文件中的数据。以下代码假脱机输出四个 CSV 文件,每个文件有 1000 行。
CONN / AS SYSDBA
ALTER SESSION SET CONTAINER = pdb1;
SET MARKUP CSV ON QUOTE ON
SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 0
SPOOL /tmp/gbr1.txt
SELECT 'GBR',
object_id,
owner,
object_name
FROM dba_objects
WHERE object_id <= 2000 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/gbr2.txt SELECT 'GBR', object_id, owner, object_name FROM dba_objects WHERE object_id BETWEEN 2000 AND 3999 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/ire1.txt SELECT 'IRE', object_id, owner, object_name FROM dba_objects WHERE object_id <= 2000 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/ire2.txt SELECT 'IRE', object_id, owner, object_name FROM dba_objects WHERE object_id BETWEEN 2000 AND 3999 AND rownum <= 1000; SPOOL OFF SET MARKUP CSV OFF SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 14
创建两个目录对象来访问这些文件。在这种情况下,两者都指向同一个目录,但它仍然允许我们看到语法。
CONN / AS SYSDBA
ALTER SESSION SET CONTAINER = pdb1;
SET MARKUP CSV ON QUOTE ON
SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 0
SPOOL /tmp/gbr1.txt
SELECT 'GBR',
object_id,
owner,
object_name
FROM dba_objects
WHERE object_id <= 2000 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/gbr2.txt SELECT 'GBR', object_id, owner, object_name FROM dba_objects WHERE object_id BETWEEN 2000 AND 3999 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/ire1.txt SELECT 'IRE', object_id, owner, object_name FROM dba_objects WHERE object_id <= 2000 AND rownum <= 1000; SPOOL OFF SPOOL /tmp/ire2.txt SELECT 'IRE', object_id, owner, object_name FROM dba_objects WHERE object_id BETWEEN 2000 AND 3999 AND rownum <= 1000; SPOOL OFF SET MARKUP CSV OFF SET TRIMSPOOL ON LINESIZE 1000 FEEDBACK OFF PAGESIZE 14
3.2、创建分区外部表
以下示例根据我们之前创建的 CSV 文件创建一个列表分区外部表。每个分区可以有一个单独的位置定义,其中可以选择包含目录定义。如果未定义位置,则分区将被视为空。
CONN test/oracle@pdb1
DROP TABLE part_tab_ext;
CREATE TABLE part_tab_ext (
country_code VARCHAR2(3),
object_id NUMBER,
owner VARCHAR2(128),
object_name VARCHAR2(128)
)
ORGANIZATION EXTERNAL (
TYPE oracle_loader
DEFAULT DIRECTORY tmp_dir1
ACCESS PARAMETERS (
FIELDS CSV WITH EMBEDDED TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
MISSING FIELD VALUES ARE NULL (
country_code,
object_id,
owner,
object_name
)
)
)
REJECT LIMIT UNLIMITED
PARTITION BY LIST (country_code) (
PARTITION part_gbr VALUES ('GBR') LOCATION ('gbr1.txt', 'gbr2.txt'),
PARTITION part_usa VALUES ('IRE') DEFAULT DIRECTORY tmp_dir2 LOCATION ('ire1.txt', 'ire2.txt'),
PARTITION part_others VALUES ('XXX')
);
查询外部表显示文件正在被正确读取。
SELECT country_code, COUNT(*) FROM part_tab_ext GROUP BY country_code ORDER BY country_code; COU COUNT(*) --- ---------- GBR 2000 IRE 2000
如果我们收集统计数据,我们可以看到行是如何在分区之间分布的。
EXEC DBMS_STATS.gather_table_stats(USER, 'part_tab_ext');
SET LINESIZE 120
COLUMN table_name FORMAT A20
COLUMN partition_name FORMAT A20
COLUMN high_value FORMAT A20
SELECT table_name,
partition_name,
high_value,
num_rows
FROM user_tab_partitions
WHERE table_name = 'PART_TAB_EXT'
ORDER BY 1, 2;
TABLE_NAME PARTITION_NAME HIGH_VALUE NUM_ROWS
-------------------- -------------------- -------------------- ----------
PART_TAB_EXT PART_GBR 'GBR' 2000
PART_TAB_EXT PART_OTHERS 'XXX' 0
PART_TAB_EXT PART_USA 'IRE' 2000
3.3、子分区外部表
以下示例根据我们之前创建的 CSV 文件创建一个列表范围子分区外部表。每个子分区可以有一个单独的位置定义,其中可以选择包含分区或子分区级别的目录定义。
CONN test/test@pdb1
DROP TABLE subpart_tab_ext;
CREATE TABLE subpart_tab_ext (
country_code VARCHAR2(3),
object_id NUMBER,
owner VARCHAR2(128),
object_name VARCHAR2(128)
)
ORGANIZATION EXTERNAL (
TYPE oracle_loader
DEFAULT DIRECTORY tmp_dir1
ACCESS PARAMETERS (
FIELDS CSV WITH EMBEDDED TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
MISSING FIELD VALUES ARE NULL (
country_code,
object_id,
owner,
object_name
)
)
)
REJECT LIMIT UNLIMITED
PARTITION BY LIST (country_code)
SUBPARTITION BY RANGE (object_id) (
PARTITION part_gbr VALUES ('GBR') (
SUBPARTITION subpart_gbr_le_2000 VALUES LESS THAN (2000) LOCATION ('gbr1.txt'),
SUBPARTITION subpart_gbr_gt_2000 VALUES LESS THAN (MAXVALUE) DEFAULT DIRECTORY tmp_dir2 LOCATION ('gbr2.txt')
),
PARTITION part_ire VALUES ('IRE') DEFAULT DIRECTORY tmp_dir2 (
SUBPARTITION subpart_ire_le_2000 VALUES LESS THAN (2000) LOCATION ('ire1.txt'),
SUBPARTITION subpart_ire_gt_2000 VALUES LESS THAN (MAXVALUE) LOCATION ('ire2.txt')
)
);
查询外部表显示文件正在被正确读取。
SELECT country_code, COUNT(*) FROM subpart_tab_ext GROUP BY country_code ORDER BY country_code; COU COUNT(*) --- ---------- GBR 2000 IRE 2000
如果我们收集统计数据,我们可以看到行是如何在分区之间分布的。
EXEC DBMS_STATS.gather_table_stats(USER, 'subpart_tab_ext');
SET LINESIZE 120
COLUMN table_name FORMAT A20
COLUMN partition_name FORMAT A20
COLUMN subpartition_name FORMAT A20
COLUMN high_value FORMAT A20
SELECT table_name,
partition_name,
subpartition_name,
high_value,
num_rows
FROM user_tab_subpartitions
WHERE table_name = 'SUBPART_TAB_EXT'
ORDER BY 1, 2;
TABLE_NAME PARTITION_NAME SUBPARTITION_NAME HIGH_VALUE NUM_ROWS
-------------------- -------------------- -------------------- -------------------- ----------
SUBPART_TAB_EXT PART_GBR SUBPART_GBR_LE_2000 2000 1000
SUBPART_TAB_EXT PART_GBR SUBPART_GBR_GT_2000 MAXVALUE 1000
SUBPART_TAB_EXT PART_IRE SUBPART_IRE_LE_2000 2000 1000
SUBPART_TAB_EXT PART_IRE SUBPART_IRE_GT_2000 MAXVALUE 1000