regexp already merges adjacent overlapping ranges. this patch
also merges adjacent, non-overlapping ranges.. for example [ab] -> [a-b].
this patch is built on the changes in regexpclass. the actual change
is 2 lines plus commented-out debugging.
this patch is a significant performance win for some cases:
/tmp/sed is the current sources version.
; time /tmp/sed -n '/[α-ω]/p' </tmp/gs>/dev/null
1.10u 0.02s 1.17r /tmp/sed -n /[α-ω]/p
; time sed -n '/[α-ω]/p' </tmp/gs>/dev/null
1.09u 0.02s 1.17r sed -n /[α-ω]/p
; time /tmp/sed -n '/[αβγδεζηθικλμνξοπρςστυφχψω]/p' </tmp/gs>/dev/null
1.72u 0.02s 1.81r /tmp/sed -n /[αβγδεζηθικλμνξοπρςστυφχψω]/p
; time /bin/sed -n '/[αβγδεζηθικλμνξοπρςστυφχψω]/p' </tmp/gs>/dev/null
1.10u 0.02s 1.17r /bin/sed -n /[αβγδεζηθικλμνξοπρςστυφχψω]/p
debugging output (in the code, commented out)
# before change
; echo x | 8.xsed -n '/[abcdefghijklmnopqrstuvwxyz]/p'
nspan = 26
a a 0061 0061
b b 0062 0062
c c 0063 0063
d d 0064 0064
e e 0065 0065
f f 0066 0066
g g 0067 0067
h h 0068 0068
i i 0069 0069
j j 006a 006a
k k 006b 006b
l l 006c 006c
m m 006d 006d
n n 006e 006e
o o 006f 006f
p p 0070 0070
q q 0071 0071
r r 0072 0072
s s 0073 0073
t t 0074 0074
u u 0075 0075
v v 0076 0076
w w 0077 0077
x x 0078 0078
y y 0079 0079
z z 007a 007a
x
# after change
; ; echo x | 8.xsed -n '/[abcdefghijklmnopqrstuvwxyz]/p'
nspan = 1
a z 0061 007a
x
|