simpleparse.stt.TextTools.TextTools (version 2.1.0) | index /home/mcfletch/pylive/simpleparse/stt/TextTools/TextTools.py |
mxTextTools - A tools package for fast text processing.
Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
See the documentation for further information on copyrights,
or contact the author. All Rights Reserved.
Modules | ||||||
|
Classes | ||||||||||
|
Functions | ||
|
Data | ||
A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' A2Z_charset = <Character Set object for 'A-Z'> A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' AllIn = 11 AllInCharSet = 41 AllInSet = 31 AllNotIn = 12 AppendMatch = 2048 AppendTagobj = 1024 AppendToTagobj = 512 BOYERMOORE = 0 Break = 0 Call = 201 CallArg = 202 CallTag = 256 EOF = 101 FASTSEARCH = 1 Fail = 100 Here = 1 Is = 13 IsIn = 14 IsInCharSet = 42 IsInSet = 32 IsNot = 15 IsNotIn = 15 Jump = 100 JumpTarget = 104 LookAhead = 4096 Loop = 205 LoopControl = 206 MatchFail = -1000000 MatchOk = 1000000 Move = 103 NoWord = 211 Reset = -1 Skip = 102 SubTable = 207 SubTableInList = 208 TRIVIAL = 2 Table = 203 TableInList = 204 ThisTable = 999 To = 0 ToBOF = 0 ToEOF = -1 Umlaute = '\xc4\xd6\xdc' Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'> Word = 21 WordEnd = 23 WordStart = 22 _linecount_table = ((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 41, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 102, 0, 0, -4)) _linesplit_table = ((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 2089, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 2150, 0, 0, -4)) _wordsplit_table = ((None, 41, <Character Set object for ' \t\x0b\r\n\x0c'>, 1), ('word', 2089, <Character Set object for '^ \t\x0b\r\n\x0c'>, 1, -1), (None, 101, 1, 1, 1000000)) a2z = 'abcdefghijklmnopqrstuvwxyz' a2z_charset = <Character Set object for 'a-z'> a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'> alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'> alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' any_charset = <Character Set object for '\x00-\xff'> any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' formfeed = '\x0c' formfeed_charset = <Character Set object for '\x0c'> german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc' german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'> german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10' id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...} newline = '\r\n' newline_charset = <Character Set object for '\r\n'> newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'> nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' number = '0123456789' number_charset = <Character Set object for '0-9'> number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' sFindWord = 213 sWordEnd = 212 sWordStart = 211 tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...} to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' umlaute = '\xe4\xf6\xfc\xdf' umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'> white = ' \t\x0b' white_charset = <Character Set object for ' \t\x0b'> white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' whitespace = ' \t\x0b\r\n\x0c' whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'> whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' |