# rules for python # based on grammar given in Programming Python by Mark Lutz # EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE # GRAMMAR DATA STRUCTURES. # ARCHIVE = "." marshalfilename = ARCHIVE + "/pygram.mar" pyrules = """ all :: ## input terminates with "fake" dedent (forces read of all file) @R all1 :: all >> file_input DEDENT ## 1 term newline ##@R lead_blank :: file_input >> NEWLINE file_input @R top_stmt :: file_input >> file_input stmt @R file_input :: file_input >> stmt ## 2 @R simple :: stmt >> simple_stmt @R compound :: stmt >> compound_stmt ## 3 punct ; term NEWLINE @R one_small :: simple_stmt >> small_stmt NEWLINE @R more_small :: simple_stmt >> small_stmt ; simple_stmt @R small_semi :: simple_stmt >> small_stmt ; NEWLINE ## 4 kw pass @R smexpr :: small_stmt >> expr_stmt @R smassn :: small_stmt >> assn @R smprint :: small_stmt >> print_stmt @R smdel :: small_stmt >> del_stmt @R smpass :: small_stmt >> pass @R smflow :: small_stmt >> flow_stmt @R smimport :: small_stmt >> import_stmt @R smglobal :: small_stmt >> global_stmt ## access ignored @R smexec :: small_stmt >> exec_stmt ## 5 @R cmif :: compound_stmt >> if_stmt @R cmwhile :: compound_stmt >> while_stmt @R cmfor :: compound_stmt >> for_stmt @R cmtry :: compound_stmt >> try_stmt @R cmdef :: compound_stmt >> funcdef @R cmclass :: compound_stmt >> classdef ##6 @R exprlist :: expr_stmt >> testlist ##@R assignment :: expr_stmt >> assn @R assn1 :: assn >> testlist = testlist @R assnn :: assn >> testlist = assn @R assn1c :: assn >> testlist , = testlist @R assn1c2 :: assn >> testlist , = testlist , @R assnnc :: assn >> testlist , = assn ##testing @R exprassn :: expr_stmt >> expr_stmt = testlist @R exprlistc :: expr_stmt >> testlist , ##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist , ##7 kw print @R rprint0 :: print_stmt >> print @R rprint :: print_stmt >> print testlist @R rprintc :: print_stmt >> print testlist , ##8 kw del @R rdel :: del_stmt >> del exprlist ##9 trivially handled in #4 ##10 kw raise continue break return ## eliminates 11 12 13 14 @R rbreak :: flow_stmt >> break @R rcontinue :: flow_stmt >> continue @R rreturn0 :: flow_stmt >> return @R rreturn :: flow_stmt >> return testlist @R rreturnc :: flow_stmt >> return testlist , @R rraise1 :: flow_stmt >> raise test @R rraise2 :: flow_stmt >> raise test , test @R rraise3 :: flow_stmt >> raise test , test , test ## 11 12 13 14 skipped ## 15 kw import from @R rimport :: import_stmt >> import dotted_name_list @R rimportc :: import_stmt >> import dotted_name_list , @R dnlist1 :: dotted_name_list >> dotted_name @R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name @R rfrom :: import_stmt >> from dotted_name import name_list @R rfroms :: import_stmt >> from dotted_name import * @R rfromc :: import_stmt >> from dotted_name import name_list , @R nlistn :: name_list >> name_list , NAME @R nlist1 :: name_list >> NAME ##16 nt NAME @R dn1 :: dotted_name >> NAME @R dnn :: dotted_name >> dotted_name . NAME ##17 kw global @R global1 :: global_stmt >> global NAME @R globaln :: global_stmt >> global_stmt , NAME ## 18 19 ignored ##20 kw exec in @R exec1 :: exec_stmt >> exec expr @R exec2 :: exec_stmt >> exec expr in test @R exec3 :: exec_stmt >> exec expr in test , test ##21 kw if elif else punct : @R ifr :: if_stmt >> if test : suite elifs @R elifs0 :: elifs >> @R relse :: elifs >> else : suite @R elifsn :: elifs >> elif test : suite elifs ##22 kw while @R while1 :: while_stmt >> while test : suite @R while2 :: while_stmt >> while test : suite else : suite ##23 kw for @R for1 :: for_stmt >> for exprlist in testlist : suite @R for2 :: for_stmt >> for exprlist in testlist : suite else : suite ##24 kw try @R tryr :: try_stmt >> try : suite excepts @R excepts1 :: excepts >> except_clause : suite @R excepts2 :: excepts >> except_clause : suite else : suite @R exceptsn :: excepts >> except_clause : suite excepts @R tryf :: try_stmt >> try : suite finally : suite ##25 kw except @R except0 :: except_clause >> except @R except1 :: except_clause >> except test @R except2 :: except_clause >> except test , test ##26 @R class1 :: classdef >> class NAME : suite @R class2 :: classdef >> class NAME ( testlist ) : suite ##27 kw def @R rdef :: funcdef >> def NAME parameters : suite ##28, 29 punct = * ## (modified from grammar presented) @R params1 :: parameters >> ( varargslist ) @R params1c :: parameters >> ( varargslist , ) @R params2 :: varargslist >> ## this is way too permissive: fix at semantic level @R params3 :: varargslist >> arg @R params4 :: varargslist >> varargslist , arg @R argd :: arg >> NAME = test @R arg2 :: arg >> fpdef @R arg3 :: arg >> * NAME @R arg4 :: arg >> ** NAME ## 30 @R fpdef1 :: fpdef >> NAME @R fpdef2 :: fpdef >> ( fplist ) @R fpdef2c :: fpdef >> ( fplist , ) ##31 @R fplist1 :: fplist >> fpdef @R fplistn :: fplist >> fplist , fpdef ##32 t INDENT DEDENT @R ssuite :: suite >> simple_stmt @R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT @R stmtseq1 :: stmtseq >> stmt @R stmtseqn :: stmtseq >> stmtseq stmt ##33 kw or cancels 53 @R testor :: test >> or_test @R testand :: or_test >> and_test @R testor1 :: or_test >> or_test or and_test ## @R testlambda0 :: test >> lambda : test REDUNDANT @R testlambda1 :: test >> lambda varargslist : test ##34 kw and @R andnot :: and_test >> not_test @R andand :: and_test >> and_test and not_test ##35 kw not @R notnot :: not_test >> not not_test @R notcmp :: not_test >> comparison ##36 NOTE KWS == >= <= <> != @R cmpexpr :: comparison >> expr @R cmplt :: comparison >> comparison < expr @R cmpgt :: comparison >> comparison > expr @R cmpeq :: comparison >> comparison == expr @R cmpge :: comparison >> comparison >= expr @R cmple :: comparison >> comparison <= expr @R cmpnep :: comparison >> comparison <> expr @R cmpne :: comparison >> comparison != expr @R cmpin :: comparison >> comparison in expr @R cmpnotin :: comparison >> comparison not in expr @R cmpis :: comparison >> comparison is expr @R cmpisnot :: comparison >> comparison is not expr ##37 kw is not punct > < ! (eliminated) ##38 p | @R expr_xor :: expr >> xor_expr @R expr_lor :: expr >> expr | xor_expr ##39 p ^ @R xor_and :: xor_expr >> and_expr @R xor_xor :: xor_expr >> xor_expr ^ and_expr ##40 @R and_shift :: and_expr >> shift_expr @R and_and :: and_expr >> and_expr & shift_expr ##41 note kw's << >x> note goofy x to avoid confusing the grammar @R shift_arith :: shift_expr >> arith_expr @R shift_left :: shift_expr >> shift_expr << arith_expr @R shift_right :: shift_expr >> shift_expr >x> arith_expr ##42 @R arith_term :: arith_expr >> term @R arith_plus :: arith_expr >> arith_expr + term @R arith_minus :: arith_expr >> arith_expr - term ##43 p */% @R termfactor :: term >> factor @R termmul :: term >> term * factor @R termdiv :: term >> term / factor @R termmod :: term >> term % factor ## stuff for power @R factorpower :: factor >> power @R factorexp :: factor >> factor ** power ##44 p ~ @R powera :: power >> atom trailerlist @R trailerlist0 :: trailerlist >> @R trailerlistn :: trailerlist >> trailer trailerlist @R powerp :: power >> + power @R powerm :: power >> - power @R poweri :: power >> ~ power ##45 t NUMBER STRING @R nulltup :: atom >> ( ) @R parens :: atom >> ( testlist ) @R parensc :: atom >> ( testlist , ) @R nulllist :: atom >> [ ] @R list :: atom >> [ testlist ] @R listc :: atom >> [ testlist , ] @R nulldict :: atom >> { } @R dict :: atom >> { dictmaker } @R dictc :: atom >> { dictmaker , } @R repr :: atom >> ` testlist ` ## @R reprc :: atom >> ` testlist , ` doesn't work, apparently @R aname :: atom >> NAME ## note number to be broken out into FLOAT OCTINT HEXINT INT @R anumber :: atom >> NUMBER @R astring :: atom >> stringseq @R stringseq0 :: stringseq >> STRING @R stringseqn :: stringseq >> stringseq STRING ##46 @R nullcall :: trailer >> ( ) @R call :: trailer >> ( arglist ) @R callc :: trailer >> ( arglist , ) @R index :: trailer >> [ subscriptdots ] @R getattr :: trailer >> . NAME ##47 @R arg1 :: arglist >> argument @R argn :: arglist >> arglist , argument ##@R argn1 :: arglist >> arglist , NAME = test ##48 ( !!!! is this wrong in PP?) @R posarg :: argument >> test ## here the left test should be a NAME always, but parser doesn't like it @R namearg :: argument >> test = test ##49 this IS wrong in PP (numeric ext) @R nodots :: subscriptdots >> subscriptseq @R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq @R subscript1 :: subscriptseq >> subscript @R subscriptn :: subscriptseq >> subscriptseq , subscript @R subscriptt :: subscript >> test @R subscripts0 :: subscript >> : @R subscriptsL :: subscript >> test : @R subscriptsR :: subscript >> : test @R subscripts :: subscript >> test : test ##50 @R exprlist1 :: exprlist >> expr @R exprlistn :: exprlist >> exprlist , expr ##51 @R testlist0 :: testlist >> test @R testlistn :: testlist >> testlist , test ##52 @R dictmaker1 :: dictmaker >> test : test @R dictmaker2 :: dictmaker >> dictmaker , test : test """ nonterms = """ subscriptdots subscript arg argument arglist subscriptseq params trailerlist factor atom trailer dictmaker stringseq power xor_expr and_expr shift_expr arith_expr term and_test or_test not_test comparison comp_op expr fplist stmtseq varargslist assn expr elifs suite excepts parameters pbasic pdefault pspecial testlist exprlist test dotted_name_list dotted_name name_list if_stmt while_stmt for_stmt try_stmt funcdef classdef expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt small_stmt compound_stmt stmt simple_stmt exec_stmt file_input except_clause fpdef cmp_op all """ import string # python needs special handling for the lexical stuff NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*" NUMBERre = "[" + string.digits + "]+" # temporary! STRINGre = '"[^"\n]*"' # to be overridden in lexdict #NEWLINEre = "\n" # to be overridden in lexdict INDENTre = "#" # a fake! to be overridden DEDENTre = "#" # a fake! to be overridden def echo(str): return str def DeclareTerminals(Grammar): Grammar.Addterm("NAME", NAMEre, echo) Grammar.Addterm("NUMBER", NUMBERre, echo) Grammar.Addterm("STRING", STRINGre, echo) #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw! Grammar.Addterm("INDENT", INDENTre, echo) Grammar.Addterm("DEDENT", DEDENTre, echo) # not >x> is a fake! keywords = """ and break class continue def del elif else except exec finally for from global if import in is lambda not or pass print raise return try while == >= <= <> != >x> << NEWLINE ** """ import kjParser, string, re from kjParser import KEYFLAG, ENDOFFILETERM alphanumunder = string.letters+string.digits+"_" alpha = string.letters + "_" # components that are part of a identifier (cannot be next to kw). id_letters = map(None, alphanumunder) # terminator re for names nametermre = "[^" + alphanumunder + "]" nameterm = re.compile(nametermre) # terminator re for numbers (same as above but allow "." in num). numtermre = "[^" + alphanumunder + "\.]" numterm = re.compile(numtermre) parseerror = "parseerror" pycommentre = r"(#.*)" # whitespace regex outside of brackets # white followed by (comment\n maybe repeated) # DON'T EAT NEWLINE!! pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre pywhiteout = re.compile(pywhiteoutre) # whitespace regex inside brackets # white or newline possibly followed by comment, all maybe repeated pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre pywhitein = re.compile(pywhiteinre) # totally blank lines (only recognize if next char is newline) #allblankre = "\n" + pywhiteinre #allblank = re.compile(allblankre) # re for indentation (might accept empty string) indentp = re.compile(r"[\t ]*") # two char kws and puncts char2kw = ["if", "or", "in", "is"] punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="] # >two char kws as map of first 3 chars to others char3k_data = """ and break class continue def del elif else except finally for from global import lambda not pass print raise return try while exec """ char3kw = string.split(char3k_data) char3kwdict = {} for x in char3kw: char3kwdict[x[:3]] = x # NOTE: newline is treated same as a punctuation # NOTE: "' ARE NOT PUNCTS punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`" punctlist = map(None, punct) kwmap = {} for x in char2kw + punct2 + char3kw + map(None, punct): # everything parses as length 1 to the outer world. kwmap[x] = (((KEYFLAG, x), x), 1) # special hack kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1) newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1) #finaldedent = (((TERMFLAG, "DEDENT"), ""), 1) # Python lexical dictionary. ### MUST HANDLE WHOLELY BLANK LINES CORRECTLY! def RMATCH(re, key, start=0): group = re.match(key, start) if group is None: return -1 return group.end() - group.start() class pylexdict(kjParser.LexDictionary): def __init__(self): kjParser.LexDictionary.__init__(self) # need to add special map for >> self.brackets = 0 # count of active brackets self.realindex = 0 # where to start self.indents = [""] # stack of indents (start with a fake one) self.lineno = 0 self.atdedent = 0 ### handle multiple dedents correctly!!! ### translate tabs to 8 spaces... from kjParser import TERMFLAG self.NAMEflag = (TERMFLAG, "NAME") self.STRINGflag = (TERMFLAG, "STRING") self.NEWLINEflag = (TERMFLAG, "NEWLINE") self.INDENTflag = (TERMFLAG, "INDENT") self.DEDENTflag = (TERMFLAG, "DEDENT") self.NUMBERflag = (TERMFLAG, "NUMBER") def endoffile(self, String): # pop off all indentations! indents = self.indents #lastresult = self.lastresult self.realindex = len(String) if not indents: # pop indents #print "eof after dedent" result = self.lastresult = (ENDOFFILETERM, 0) else: #print "eof as dedent after", self.lastresult del indents[-1] if indents: dedent = indents[-1] else: dedent = "" result = self.lastresult = ((self.DEDENTflag, dedent), 1) #print "returning eof", result, "after", lastresult return result def Token(self, String, StartPosition): #print "Token", (StartPosition, # `String[self.realindex:self.realindex+20]`, self.lastresult) # HAVE TO FAKE OUT LEXER FOR DEDENTS # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION # STRING POSITION IS MAINTAINED IN LexDict object. lastindex = self.lastindex lastresult = self.lastresult if self.laststring is not String: #print "parsing new string" self.laststring = String # special hack: skip lead whitespace cursor = 0 self.lineno = 1 while 1: test = RMATCH(pywhitein,String, cursor) if test<0: break next = cursor + test #print "lead skip:", next, String[cursor:next] if String[next]!="\n": break #skipped = String[cursor:next] #if "\n" in skipped: # self.lineno = ( # self.lineno + len(string.splitfields(skipped, "\n"))) #self.lineno = self.lineno+1 cursor = next + 1 self.realindex = cursor self.saveindex = 0 self.indents = [""] # stack of indents (start with a fake one) # pretend we saw a newline self.lastresult = newlineresult if StartPosition!=0: self.laststring = None raise ValueError, "python lexical parsing must start at zero" lastindex = self.lastindex lastresult = None elif lastindex == StartPosition: #print "returning lastresult ", lastresult return lastresult elif lastindex != StartPosition-1: raise ValueError, "python lexer can't skip tokens" #print "parsing", StartPosition, lastresult # do newline counting here! delta = String[self.saveindex: self.realindex] #print "delta", `delta` if "\n" in delta: #print self.lineno, self.saveindex, self.realindex, `delta` self.lineno = self.lineno + len( string.splitfields(delta, "\n")) - 1 realindex = self.saveindex = self.realindex self.lastindex = StartPosition # skip whitespace (including comments) ### needs to be improved to parse blank lines, count line numbers... # skip all totally blank lines (don't eat last newline) atlineend = (String[realindex:realindex+1] == "\n" or lastresult is newlineresult or self.atdedent) skipnewlines = (lastresult is newlineresult or self.atdedent or self.brackets>0) if atlineend: #String[realindex:realindex+1]=="\n": #print "trying to skip blank lines", String[realindex:realindex+10] while 1: #if String[realindex:realindex+1]=="\n": # start = realindex+1 # move past current newline # self.lineno = self.lineno + 1 #else: # start = realindex start = realindex if skipnewlines: while String[start:start+1]=="\n": start = start+1 #self.lineno = self.lineno+1 #print "matching", `String[start:start+10]` skip = RMATCH(pywhitein,String, start) #print "skip=", skip if skip<0: break rs = skip + realindex + (start-realindex) if rs==realindex: break #print "at", rs, `String[rs]` if (rs0: # rs = rs + 1 #skipped = String[start:rs] #if "\n" in skipped: #self.lineno = self.lineno + len( # string.splitfields(skipped, "\n")) self.realindex = realindex = rs #self.lineno = self.lineno+1 else: if skipnewlines: self.realindex = realindex = start break #print "after skipping blank lines", `String[realindex:realindex+20]` skipto = realindex skip = 0 if self.brackets>0: while 1: #print "skipping white in brackets", skipto if realindex>len(String): break if String[skipto]=="\n": #self.lineno = self.lineno+1 skipto = skipto + 1 self.realindex = realindex = skipto continue skip = RMATCH(pywhiteout,String, skipto) nextskipto = skipto+skip #skipped = String[skipto:nextskipto] #if "\n" in skipped: # self.lineno = self.lineno+len( # string.splitfields(skipped, "\n")) if skip>0: skipto = nextskipto else: break skip = skipto - realindex elif not atlineend: skip = RMATCH(pywhitein,String, realindex) if skip<=0: skip = 0 else: #print "skipping", skip nextri = realindex + skip #skipped = String[realindex:nextri] #if "\n" in skipped: # self.lineno = self.lineno + len( # string.splitfields(skipped, "\n")) realindex = self.realindex = nextri if realindex>=len(String): return self.endoffile(String) # now look for a keyword, name, number, punctuation, # INDENT, DEDENT, NEWLINE first = String[realindex] #if last parse was newline and not in brackets: # look for indent/dedent if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent) and first != "\n"): #print "looking for dent", realindex, `String[realindex:realindex+20]` match = RMATCH(indentp,String, realindex) if match>=0: dent = String[realindex: realindex+match] #print "dent match", match, `dent` oldindex = realindex self.realindex = realindex = realindex+match # replace tabs with 8 spaces dent = string.joinfields(string.splitfields(dent, "\t"), " ") dents = self.indents lastdent = dents[-1] ldl = len(lastdent) dl = len(dent) #print "last", ldl, dents if ldldl: self.realindex = oldindex # back up, may have to see it again! self.atdedent = 1 result = self.lastresult = ((self.DEDENTflag, dent), 1) del dents[-1] #print "dedent ", result, dl, dents return result # otherwise, indentation is same, keep looking # might be at eof now: if realindex>=len(String): #print "returning eof" return self.endoffile(String) first = String[realindex] self.atdedent = 0 from string import digits #, letters if (first in punctlist and # special case for .123 numbers (yuck!) (first!="." or String[realindex+1] not in digits)): # is it a 2 char punct? first2 = String[realindex:realindex+2] if first2 in punct2: result = self.lastresult = kwmap[first2] self.realindex = realindex+2 #print "2 digit punct", result return result # otherwise, just return normal punct result = self.lastresult = kwmap[first] self.realindex = self.realindex + 1 ### special bookkeeping if first=="\n": result = newlineresult #print "newline!" #self.lineno = self.lineno+1 elif first in "[{(": #print "bracket!" self.brackets = self.brackets + 1 elif first in "]})": #print "close bracket!" self.brackets = self.brackets - 1 #print "1 digit punct", result return result if first in digits or first==".": # parse a number... skip = numterm.search(String, realindex) if skip<=realindex: raise parseerror, "number length<1 (!)" thenumber = String[realindex:skip] self.realindex = skip ### note don't interpret number here!! result = self.lastresult = ((self.NUMBERflag, thenumber), 1) #print "number", result return result if first in alpha: # try keyword... first2 = String[realindex: realindex+2] if first2 in char2kw: if String[realindex+2:realindex+3] not in id_letters: # parse a 2 char kw first2 result = self.lastresult = kwmap[first2] self.realindex = self.realindex+2 #print "keyword 2", result return result first3 = String[realindex: realindex+3] if char3kwdict.has_key(first3): the_kw = char3kwdict[first3] the_end = realindex+len(the_kw) if ((the_endsanity: raise parseerror, "failed to terminate single quotes" if String[last-1:last]=="\\": # are we at the end of an odd number of backslashes? (yuck!) bplace = last-1 while String[bplace:bplace+1]=="\\": bplace = bplace-1 if (last-bplace)%2==1: break # the end quote is real! place = last+1 else: break the_string = String[start:last] self.realindex = last+1 result = self.lastresult = ((self.STRINGflag, the_string), 1) #print "1q string", result return result #print (String[realindex-20:realindex-1], String[realindex], # String[realindex+1:realindex+20]) raise parseerror, "invalid first: " + `first` # use a modified lexstringwalker class pylexstringwalker(kjParser.LexStringWalker): def DUMP(self): kjParser.DumpStringWindow(self.String, self.LexDict.realindex) ## a HORRIBLE HACK! of a hack: override the DoParse of Grammar ## to give Python line numbers. RELIES ON GLOBAL pyg ## def hackDoParse(String, Context=None, DoReductions=1): import sys, kjParser try: # construct the ParserObj # add a newline to front to avoid problem with leading comment #String = "\n%s\n" % String Stream = pylexstringwalker( String, pyg.LexD ) Stack = [] # {-1:0} #Walkers.SimpleStack() ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \ DoReductions, Context ) # do the parse ParseResult = ParseOb.GO() # return final result of reduction and the context return (ParseResult[1], Context) #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions) except: ### for testing!! t, v = sys.exc_type, sys.exc_value v = ("near line", pyg.LexD.lineno, v) raise t, v buildinfo = """ Please edit the ARCHIVE parameter of this module (%s) to place the python grammar archive in a standard directory to prevent the module from rebuilding the python grammar over and over and over... """ % __name__ def GrammarBuild(): global pyg import kjParseBuild pyg = kjParseBuild.NullCGrammar() pyg.DoParse = hackDoParse # override lexical dict here pyg.LexD = pylexdict() DeclareTerminals(pyg) pyg.Keywords(keywords) pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}") pyg.Nonterms(nonterms) pyg.Declarerules(pyrules) print buildinfo print "compiling... this may take a while..." pyg.Compile() print "dumping" outfile = open(marshalfilename, "wb") pyg.MarshalDump(outfile) outfile.close() print "self testing the grammar" test(pyg) print "\n\ndone with regeneration" return pyg def unMarshalpygram(): global pyg import kjParser print "loading" try: infile = open(marshalfilename, "rb") except IOError: print marshalfilename, "not found, attempting creation" pyg = GrammarBuild() else: pyg = kjParser.UnMarshalGram(infile) infile.close() pyg.DoParse = hackDoParse # lexical override pyg.LexD = pylexdict() DeclareTerminals(pyg) # BindRules(pyg) if dotest: print "self testing the grammar" test(pyg) return pyg # not used, commented #### interpretation rules/classes # #def zeroth(list, Context): # return list[0] # eg, for all1, ignore all but first # ## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob #def append(list, Context): # "eg, for top_stmt, conjoin two smt lists" # return list[0] + list[1] # ## file_input >zeroth # ## simple, compound, one_small, small_semi: echol #def echol(list, Context): # return list # ## more_small > seq_sep #def seq_sep(list, Context): # list[0].append(list[2]) # return list[0] # ## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec ## > zeroth # ## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth # # #def BindRules(pyg): # for name in string.split(""" # all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass # smexpr smassn smprint smdel smflow smimport smglobal smexec # """): # pyg.Bind(name, zeroth) # for name in string.split(""" # simple compound one_small small_semi # """): # pyg.Bind(name, echol) # pyg.Bind("top_stmt", append) # pyg.Bind("more_small", seq_sep) teststring = """# # # a test string # from string import join, split ''' import re for a in l: a.attr, a[x], b = c else: d = b ''' class zzz: ''' #doc string ''' ''' global re, join d = {} for i in range(10): d[i] = i ''' def test(c,s): return "this" while not done: print done break list = [1,2,3] # comment return 5 n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8 if x==5: while y: for i in range(6): raise SystemError, "oops" """ #teststring ="""\ ## comment #if x in y: print z #elif 1: print w #""" ''' teststring=""" exec "print 1" """ ''' def test(grammar, context=None, teststring=teststring): from time import time now = time() x = grammar.DoParse1(teststring, context) elapsed = time()-now print x print elapsed return x regen = 0 dotest = 0 if __name__ == "__main__" : if regen: GrammarBuild() unMarshalpygram()