//http://groovy.codehaus.org/Regular+Expressions
def r = /e+/
def str = 'me cheese please'
def m = str =~ r
assert m.size() == 5
println m*.toString() //[e, ee, e, e, e]
/////////////////////////////
def r = /(This boy) is/
def str = 'This boy is 10. This boy wants chocolate. This boy is tall.'
def m = str =~ r
assert m.size() == 2
assert m.collect { it[1] } == ['This boy','This boy']
/////////////////////////////
assert "abc".replaceAll(/(a)(b)(c)/, "\$1\$3") == 'ac' //back references
/////////////////////////////
//http://www.regular-expressions.info/captureall.html
r = /((?:abc|123)+).*?/
str = '123abc 123abc123'
m = str =~ r
assert m.size() == 2
println m*.toString()
print m[0][1..-1]
println m[1][1..-1]
/////////////////////////////
//password of 8 characters long and two non-letters
def r1 = /.*[^a-zA-Z].*[^a-zA-Z].*(?<=.{7})/ //look-behind ?<=
assert 'abc' !=~ r1
assert 'abcde12' !=~ r1
assert 'abcdef12' ==~ r1
assert 'abc1defgggg2' ==~ r1
assert 'abc1defgggg' !=~ r1
//word is foo$wrd*, store $wrd*
def wrd = 'bar'
def r2 = /foo((?=$wrd)[\w]+)/ //look-ahead ?=
def foo = "foo${wrd}hellow"
assert foo ==~ r2
def m = foo =~ r2
assert m[0][1] == "${wrd}hellow" // note: $wrd not consumed by check, is stored in result
foo = 'foohellow' // no $wrd
assert foo !=~ r2
//ADVANCED/////////////////////////////
def churnText(String text) {
def points = [[k: ~/(N|n)orth(E|e)ast(ern)?/ , v:'NE'],
[k: ~/(?>(N|n)orth(W|w)est(ern|:)?)(?! Territories)/ , v:'NW'],
[k: ~/(S|s)outheast(ern)?/ , v:'SE'],
[k: ~/(?>(S|s)outh(\s)?(W|w)est(ern)?)(?! Hill| Bend)/ , v:'SW'],
[k: ~/(?>(N|n)orth(ern)?|Upstate)(?! Carolina| Dakota| Platte| Neck| Mariana Islands| Bay|ridge)/ , v:'N' ],
[k: ~/(E|e)ast(ern)?/ , v:'E' ],
[k: ~/(?>(S|s)outh(ern|side)?)(?! Carolina| Dakota)/ , v:'S' ],
[k: ~/(?!(?<=George ))(?>(W|w)est(ern| of the)?)(?! Virginia| Palm Beach)/ , v:'W' ],
[k: ~/(?>(C|c)entral|Center|Middle|the middle section of the)(?!town| Peninsula| Tennessee|ia)/ , v:'C' ]]
points.each {p ->
def matcher = (text =~ p.k)
text = matcher.replaceAll(p.v)
println "p.v: ${p.v} text: $text"
}
}
churnText('Northwest Virginia')
println '='*40
churnText('NorthWestern Territories')
println '='*40
churnText('East George West')
return
Monday, February 27, 2012
Regex look-ahead/behind syntax
Labels:
regex
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment