[20190827]函数索引与选择率.txt
--//一般情况下查询条件使用函数,选择率1%.如果建立虚拟列并且分析的情况下,能够获得比较准确的选择率。
--//如果建立对应的函数索引呢,情况会怎么呢?近遇到的问题,让我注意其中一些细节。
1.环境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0 12.2.0.1.0 Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production 0
SCOTT@test01p> create table t1 as select rownum id,dbms_random.string('U',10) v1 from dual connect by level<=1e4;
Table created.
--//分析略.Method_Opt => 'FOR ALL COLUMNS SIZE 1 '
SCOTT@test01p> select * from t1 where rownum=1;
ID V1
---------- --------------------
1 KCIXTFQWHZ
2.测试:
SCOTT@test01p> alter session set statistics_level = all;
Session altered.
SCOTT@test01p> select * from t1 where lower(v1)='kcixtfqwhz';
ID V1
---------- --------------------
1 KCIXTFQWHZ
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID ds718dy9422mr, child number 1
-------------------------------------
select * from t1 where lower(v1)='kcixtfqwhz'
Plan hash value: 3617692013
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 10 (100)| | 1 |00:00:00.01 | 41 |
|* 1 | TABLE ACCESS FULL| T1 | 1 | 100 | 1500 | 10 (0)| 00:00:01 | 1 |00:00:00.01 | 41 |
--------------------------------------------------------------------------------------------------------------------
--//E-Rows=100,
--//选择率100/10000 = .01.
3.如果建立对应的函数索引呢?
SCOTT@test01p> create index if_t1_v1 on t1( lower(v1));
Index created.
SCOTT@test01p> select * from t1 where lower(v1)='kcixtfqwhz';
ID V1
---------- --------------------
1 KCIXTFQWHZ
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID ds718dy9422mr, child number 1
-------------------------------------
select * from t1 where lower(v1)='kcixtfqwhz'
Plan hash value: 544604454
---------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | Reads |
---------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 9 (100)| | 1 |00:00:00.01 | 4 | 1 |
| 1 | TABLE ACCESS BY INDEX ROWID BATCHED| T1 | 1 | 100 | 1500 | 9 (0)| 00:00:01 | 1 |00:00:00.01 | 4 | 1 |
|* 2 | INDEX RANGE SCAN | IF_T1_V1 | 1 | 40 | | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 3 | 1 |
---------------------------------------------------------------------------------------------------------------------------------------------------
--//E-Rows=40.
--//选择率40/10000 = .004,这个是我以前没有注意到的,我一直以为选择率还是1%.不知道这个缺省的选择率如何确定的.
4.继续探究:
--//实际上建立函数索引时有1个细节不容忽视,就是该隐含字段缺乏统计信息,我曾经在这里栽过跟头.
SCOTT@test01p> SCOTT@test01p> @ tab_lh scott t1 ''
DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT OWNER TABLE_NAME COLUMN
SAMPLE : @ TAB_LH TABLE_NAME [COLUMN_NAME]
IF NOT INPUT COLUMN_NAME ,USE "" .
COLUMN_NAME DATA_TYPE DATA_LENGTH N NUM_DISTINCT DENSITY SAMPLE_SIZE TRANS_LOW TRANS_HIGH NUM_NULLS NUM_BUCKETS LAST_ANALYZED HISTOGRAM DATA_DEFAULT
------------- --------- ----------- - ------------ ---------- ----------- ---------- ---------- --------- ----------- ------------------- --------- -------------
ID NUMBER 22 Y 10000 .0001 10000 1 10000 0 1 2019-08-27 21:35:55 NONE
V1 VARCHAR2 4000 Y 10000 .0001 10000 AAAXTDLLNT ZZZBEIPJYY 0 1 2019-08-27 21:35:55 NONE
SYS_NC00003$ VARCHAR2 4000 Y NONE LOWER("V1")
--//必须要重新分析表.
execute sys.dbms_stats.gather_table_stats ( OwnName => nvl('',user),TabName => 't1',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE 1 ',Cascade => True ,No_Invalidate => false)
SCOTT@test01p> @ tab_lh scott t1 SYS_NC00003$
DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT OWNER TABLE_NAME COLUMN
SAMPLE : @ TAB_LH TABLE_NAME [COLUMN_NAME]
IF NOT INPUT COLUMN_NAME ,USE "" .
COLUMN_NAME DATA_TYPE DATA_LENGTH N NUM_DISTINCT DENSITY SAMPLE_SIZE TRANS_LOW TRANS_HIGH NUM_NULLS NUM_BUCKETS LAST_ANALYZED HISTOGRAM DATA_DEFAULT
------------ --------- ----------- - ------------ ---------- ----------- ---------- ---------- --------- ----------- ------------------- --------- ------------
SYS_NC00003$ VARCHAR2 4000 Y 10000 .0001 10000 aaaxtdllnt zzzbeipjyy 0 1 2019-08-27 21:46:06 NONE LOWER("V1")
SCOTT@test01p> select * from t1 where lower(v1)='kcixtfqwhz';
ID V1
---------- --------------------
1 KCIXTFQWHZ
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID ds718dy9422mr, child number 1
-------------------------------------
select * from t1 where lower(v1)='kcixtfqwhz'
Plan hash value: 544604454
------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 2 (100)| | 1 |00:00:00.01 | 4 |
| 1 | TABLE ACCESS BY INDEX ROWID BATCHED| T1 | 1 | 1 | 26 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 4 |
|* 2 | INDEX RANGE SCAN | IF_T1_V1 | 1 | 1 | | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
------------------------------------------------------------------------------------------------------------------------------------------
--//这样E-Rows估算就比较准确了.
5.附上tab_lh.sql脚本:
/* Formatted on 2014/10/19 20:53:41 (QP5 v5.227.12220.39754) */
PROMPT
PROMPT DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
PROMPT INPUT OWNER TABLE_NAME COLUMN
PROMPT SAMPLE : @ TAB_LH TABLE_NAME [COLUMN_NAME]
PROMPT IF NOT INPUT COLUMN_NAME ,USE "" .
PROMPT
column trans_low format a32
column trans_high format a32
column data_default format a20
column column_name format a24
SELECT
--owner,
-- table_name,
column_name,
data_type,
data_length,
nullable,
num_distinct,
density,
sample_size,
CASE
WHEN data_type IN ('CHAR', 'VARCHAR2')
THEN
UTL_RAW.cast_to_varchar2 (low_value)
WHEN data_type = 'NUMBER'
THEN
TO_CHAR (UTL_RAW.cast_to_number (low_value))
WHEN data_type = 'DATE'
THEN
RTRIM (
LTRIM (
TO_CHAR (
100
* ( TO_NUMBER (SUBSTR (low_value, 1, 2), 'XX')
- 100)
+ (TO_NUMBER (SUBSTR (low_value, 3, 2), 'XX') - 100),
'0000'))
|| '-'
|| LTRIM (
TO_CHAR (TO_NUMBER (SUBSTR (low_value, 5, 2), 'XX'),
'00'))
|| '-'
|| LTRIM (
TO_CHAR (TO_NUMBER (SUBSTR (low_value, 7, 2), 'XX'),
'00'))
|| ' '
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (low_value, 9, 2), 'XX') - 1,
'00'))
|| ':'
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (low_value, 11, 2), 'XX') - 1,
'00'))
|| ':'
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (low_value, 13, 2), 'XX') - 1,
'00')))
END
trans_low,
CASE
WHEN data_type IN ('CHAR', 'VARCHAR2')
THEN
UTL_RAW.cast_to_varchar2 (high_value)
WHEN data_type = 'NUMBER'
THEN
TO_CHAR (UTL_RAW.cast_to_number (high_value))
WHEN data_type = 'DATE'
THEN
RTRIM (
LTRIM (
TO_CHAR (
100
* ( TO_NUMBER (SUBSTR (high_value, 1, 2), 'XX')
- 100)
+ (TO_NUMBER (SUBSTR (high_value, 3, 2), 'XX') - 100),
'0000'))
|| '-'
|| LTRIM (
TO_CHAR (TO_NUMBER (SUBSTR (high_value, 5, 2), 'XX'),
'00'))
|| '-'
|| LTRIM (
TO_CHAR (TO_NUMBER (SUBSTR (high_value, 7, 2), 'XX'),
'00'))
|| ' '
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (high_value, 9, 2), 'XX') - 1,
'00'))
|| ':'
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (high_value, 11, 2), 'XX') - 1,
'00'))
|| ':'
|| LTRIM (
TO_CHAR (
TO_NUMBER (SUBSTR (high_value, 13, 2), 'XX') - 1,
'00')))
END
trans_high,
num_nulls,
num_buckets,
last_analyzed,
histogram,
data_default
FROM dba_tab_cols
WHERE owner = decode('&1','',user,upper('&1'))
AND table_name = upper('&2')
AND column_name = decode('&&3','',column_name,upper('&&3'))
ORDER BY column_id
/
[20190827]函数索引与选择率.txt
分享好友
分享这个小栈给你的朋友们,一起进步吧。
订阅须知
• 所有用户可根据关注领域订阅专区或所有专区
• 付费订阅:虚拟交易,一经交易不退款;若特殊情况,可3日内客服咨询
• 专区发布评论属默认订阅所评论专区(除付费小栈外)
技术专家
查看更多- 栈栈专家