Oracle学习日记——时间日期范围处理

1.定位连续值的范围

create or replace view v(proj_id,proj_start,proj_end) as

select 1,date'2005-01-01',date'2005-01-02' from dual union all

select 2,date'2005-01-02',date'2005-01-03' from dual union all

select 3,date'2005-01-03',date'2005-01-04' from dual union all

select 4,date'2005-01-04',date'2005-01-05' from dual union all

select 5,date'2005-01-06',date'2005-01-07' from dual union all

select 6,date'2005-01-16',date'2005-01-17' from dual union all

select 7,date'2005-01-17',date'2005-01-18' from dual union all

select 8,date'2005-01-18',date'2005-01-19' from dual union all

select 9,date'2005-01-19',date'2005-01-20' from dual union all

select 10,date'2005-01-21',date'2005-01-22' from dual union all

select 11,date'2005-01-26',date'2005-01-27' from dual union all

select 12,date'2005-01-27',date'2005-01-28' from dual union all

select 13,date'2005-01-28',date'2005-01-29' from dual union all

select 14,date'2005-01-29',date'2005-01-30' from dual ;

需求:把连续的数据查询出来

方案1:自关联

select v1.proj_id as 工程号,v1.proj_start as 开始时间,v1.proj_end as 结束时间

from v v1,v v2 where v1.proj_start = v2.proj_end

方案2:使用lead() over() 进行过滤

select * from

(select v1.proj_id as 工程号,v1.proj_start as 开始时间,v1.proj_end as 结束时间,

lead(v1.proj_start)over(order by proj_id) as 下一期工程开始时间

from v v1 )

where 结束时间 = 下一期工程开始时间

在上面的两种写法中,自关联需要扫描两次视图“V”,而使用分析函数只需要一次就可以,根据这个特性,大部分情况下可以通过分析函数优化查询性能。

2.定位连续值范围的开始点和结束点

需求:现在要求把连续的项目合并,返回合并后的起止时间,如前四个项目合并后起止时间就是1号到5号。

如果是取最小开始时间和最大结束时间,则比较容易操作

select min(proj_start) as 开始,max(proj_end) as 结束 from v;

但是远远不能满足我们的需求。

分析:

(1)提取上一工程的结束日期

create or replace view x0 as

select proj_id as 编号,

proj_start as 开始日期,

proj_end as 结束日期,

lag(proj_end) over(order by proj_id) as 上一工程结束日期

from v

select * from x0

(2)标定工程的连续状态

create or replace view x1 as

select 编号,

开始日期,

结束日期,

上一工程结束日期,

case when 开始日期 = 上一工程结束日期 then 0 else 1 end as 连续状态

from x0;

select * from x1

可以看到,在每一个连续分组的开始位置,我们都生成了一个“1”作为标识。

(3)对这个位置状态进行累加,得到分组依据

create or replace view x2 as

select 编号,

开始日期,

结束日期,

上一工程结束日期,

连续状态,

sum(连续状态) over(order by 编号) as 分组依据

from x1;

select * from x2;

可以看到,通过提取数据(上一行日期)、生成标识、累加标识这些操作后,得到了5个连续分组,有分组依据后就容易完成下面的操作。

select 分组依据,min(开始日期) as 开始日期,max(结束日期) as 结束日期

from x2

group by 分组依据

order by 1

把上面各步骤整理在一起的语句如下:

select 分组依据,min(开始日期) as 开始日期,max(结束日期) as 结束日期

from (select 编号,

开始日期,

结束日期,

sum(连续状态) over(order by 编号) 分组依据

from (select proj_id as 编号,

proj_start as 开始日期,

proj_end as 结束日期,

case when lag(proj_end) over(order by proj_id) = proj_start then 0 else 1 end 连续状态 from v))

group by 分组依据

order by 1;

3.合并时间段

create or replace Timesheets(tast_id,start_date,end_date) as

select 1,date'1997-01-01',date'1997-01-03' from dual union all

select 2,date'1997-01-02',date'1997-01-04' from dual union all

select 3,date'1997-01-04',date'1997-01-05' from dual union all

select 4,date'1997-01-06',date'1997-01-09' from dual union all

select 5,date'1997-01-09',date'1997-01-09' from dual union all

select 6,date'1997-01-09',date'1997-01-09' from dual union all

select 7,date'1997-01-12',date'1997-01-15' from dual union all

select 8,date'1997-01-13',date'1997-01-13' from dual union all

select 9,date'1997-01-15',date'1997-01-15' from dual union all

select 10,date'1997-01-17',date'1997-01-17' from dual

select * from Timesheets

id7与id9是连续的,但中间id8和id9不连续,所以用lag取上一行来判断肯定不对。

(1)这时可以用另一个开窗方式来处理:获取当前行之前的最大“end_date”

select start_date,

end_date,

max(end_date) over(order by start_date rows between unbounded preceding and 1 preceding) as max_end_date

from timesheets b;

between unbounded preceding and 1 preceding :就是between ... and ....子句,意思是:从第一行到上一行

该分析函数就是order by start_date后“第一行到上一行”范围内的“max(end_date)”

有了这个数据后再来判断,就可以把id(7、8、9)判断为连续范围了。