1 #
   2 # Secret Labs' Regular Expression Engine
   3 #
   4 # various symbols used by the regular expression engine.
   5 # run this script to update the _sre include files!
   6 #
   7 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
   8 #
   9 # See the sre.py file for information on usage and redistribution.
  10 #
  11 
  12 """Internal support module for sre"""
  13 
  14 # update when constants are added or removed
  15 
  16 MAGIC = 20031017
  17 
  18 from _sre import MAXREPEAT
  19 
  20 # SRE standard exception (access as sre.error)
  21 # should this really be here?
  22 
  23 class error(Exception):
  24     pass
  25 
  26 # operators
  27 
  28 FAILURE = "failure"
  29 SUCCESS = "success"
  30 
  31 ANY = "any"
  32 ANY_ALL = "any_all"
  33 ASSERT = "assert"
  34 ASSERT_NOT = "assert_not"
  35 AT = "at"
  36 BIGCHARSET = "bigcharset"
  37 BRANCH = "branch"
  38 CALL = "call"
  39 CATEGORY = "category"
  40 CHARSET = "charset"
  41 GROUPREF = "groupref"
  42 GROUPREF_IGNORE = "groupref_ignore"
  43 GROUPREF_EXISTS = "groupref_exists"
  44 IN = "in"
  45 IN_IGNORE = "in_ignore"
  46 INFO = "info"
  47 JUMP = "jump"
  48 LITERAL = "literal"
  49 LITERAL_IGNORE = "literal_ignore"
  50 MARK = "mark"
  51 MAX_REPEAT = "max_repeat"
  52 MAX_UNTIL = "max_until"
  53 MIN_REPEAT = "min_repeat"
  54 MIN_UNTIL = "min_until"
  55 NEGATE = "negate"
  56 NOT_LITERAL = "not_literal"
  57 NOT_LITERAL_IGNORE = "not_literal_ignore"
  58 RANGE = "range"
  59 REPEAT = "repeat"
  60 REPEAT_ONE = "repeat_one"
  61 SUBPATTERN = "subpattern"
  62 MIN_REPEAT_ONE = "min_repeat_one"
  63 
  64 # positions
  65 AT_BEGINNING = "at_beginning"
  66 AT_BEGINNING_LINE = "at_beginning_line"
  67 AT_BEGINNING_STRING = "at_beginning_string"
  68 AT_BOUNDARY = "at_boundary"
  69 AT_NON_BOUNDARY = "at_non_boundary"
  70 AT_END = "at_end"
  71 AT_END_LINE = "at_end_line"
  72 AT_END_STRING = "at_end_string"
  73 AT_LOC_BOUNDARY = "at_loc_boundary"
  74 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
  75 AT_UNI_BOUNDARY = "at_uni_boundary"
  76 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
  77 
  78 # categories
  79 CATEGORY_DIGIT = "category_digit"
  80 CATEGORY_NOT_DIGIT = "category_not_digit"
  81 CATEGORY_SPACE = "category_space"
  82 CATEGORY_NOT_SPACE = "category_not_space"
  83 CATEGORY_WORD = "category_word"
  84 CATEGORY_NOT_WORD = "category_not_word"
  85 CATEGORY_LINEBREAK = "category_linebreak"
  86 CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
  87 CATEGORY_LOC_WORD = "category_loc_word"
  88 CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
  89 CATEGORY_UNI_DIGIT = "category_uni_digit"
  90 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
  91 CATEGORY_UNI_SPACE = "category_uni_space"
  92 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
  93 CATEGORY_UNI_WORD = "category_uni_word"
  94 CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
  95 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
  96 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
  97 
  98 OPCODES = [
  99 
 100     # failure=0 success=1 (just because it looks better that way :-)
 101     FAILURE, SUCCESS,
 102 
 103     ANY, ANY_ALL,
 104     ASSERT, ASSERT_NOT,
 105     AT,
 106     BRANCH,
 107     CALL,
 108     CATEGORY,
 109     CHARSET, BIGCHARSET,
 110     GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
 111     IN, IN_IGNORE,
 112     INFO,
 113     JUMP,
 114     LITERAL, LITERAL_IGNORE,
 115     MARK,
 116     MAX_UNTIL,
 117     MIN_UNTIL,
 118     NOT_LITERAL, NOT_LITERAL_IGNORE,
 119     NEGATE,
 120     RANGE,
 121     REPEAT,
 122     REPEAT_ONE,
 123     SUBPATTERN,
 124     MIN_REPEAT_ONE
 125 
 126 ]
 127 
 128 ATCODES = [
 129     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
 130     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
 131     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
 132     AT_UNI_NON_BOUNDARY
 133 ]
 134 
 135 CHCODES = [
 136     CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
 137     CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
 138     CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
 139     CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
 140     CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
 141     CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
 142     CATEGORY_UNI_NOT_LINEBREAK
 143 ]
 144 
 145 def makedict(list):
 146     d = {}
 147     i = 0
 148     for item in list:
 149         d[item] = i
 150         i = i + 1
 151     return d
 152 
 153 OPCODES = makedict(OPCODES)
 154 ATCODES = makedict(ATCODES)
 155 CHCODES = makedict(CHCODES)
 156 
 157 # replacement operations for "ignore case" mode
 158 OP_IGNORE = {
 159     GROUPREF: GROUPREF_IGNORE,
 160     IN: IN_IGNORE,
 161     LITERAL: LITERAL_IGNORE,
 162     NOT_LITERAL: NOT_LITERAL_IGNORE
 163 }
 164 
 165 AT_MULTILINE = {
 166     AT_BEGINNING: AT_BEGINNING_LINE,
 167     AT_END: AT_END_LINE
 168 }
 169 
 170 AT_LOCALE = {
 171     AT_BOUNDARY: AT_LOC_BOUNDARY,
 172     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
 173 }
 174 
 175 AT_UNICODE = {
 176     AT_BOUNDARY: AT_UNI_BOUNDARY,
 177     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
 178 }
 179 
 180 CH_LOCALE = {
 181     CATEGORY_DIGIT: CATEGORY_DIGIT,
 182     CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
 183     CATEGORY_SPACE: CATEGORY_SPACE,
 184     CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
 185     CATEGORY_WORD: CATEGORY_LOC_WORD,
 186     CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
 187     CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
 188     CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
 189 }
 190 
 191 CH_UNICODE = {
 192     CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
 193     CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
 194     CATEGORY_SPACE: CATEGORY_UNI_SPACE,
 195     CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
 196     CATEGORY_WORD: CATEGORY_UNI_WORD,
 197     CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
 198     CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
 199     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
 200 }
 201 
 202 # flags
 203 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
 204 SRE_FLAG_IGNORECASE = 2 # case insensitive
 205 SRE_FLAG_LOCALE = 4 # honour system locale
 206 SRE_FLAG_MULTILINE = 8 # treat target as multiline string
 207 SRE_FLAG_DOTALL = 16 # treat target as a single string
 208 SRE_FLAG_UNICODE = 32 # use unicode "locale"
 209 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
 210 SRE_FLAG_DEBUG = 128 # debugging
 211 SRE_FLAG_ASCII = 256 # use ascii "locale"
 212 
 213 # flags for INFO primitive
 214 SRE_INFO_PREFIX = 1 # has prefix
 215 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
 216 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
 217 
 218 if __name__ == "__main__":
 219     def dump(f, d, prefix):
 220         items = sorted(d.items(), key=lambda a: a[1])
 221         for k, v in items:
 222             f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
 223     f = open("sre_constants.h", "w")
 224     f.write("""\
 225 /*
 226  * Secret Labs' Regular Expression Engine
 227  *
 228  * regular expression matching engine
 229  *
 230  * NOTE: This file is generated by sre_constants.py.  If you need
 231  * to change anything in here, edit sre_constants.py and run it.
 232  *
 233  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 234  *
 235  * See the _sre.c file for information on usage and redistribution.
 236  */
 237 
 238 """)
 239 
 240     f.write("#define SRE_MAGIC %d\n" % MAGIC)
 241 
 242     dump(f, OPCODES, "SRE_OP")
 243     dump(f, ATCODES, "SRE")
 244     dump(f, CHCODES, "SRE")
 245 
 246     f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
 247     f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
 248     f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
 249     f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
 250     f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
 251     f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
 252     f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
 253 
 254     f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
 255     f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
 256     f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
 257 
 258     f.close()
 259     print("done")