# rules for python
# based on grammar given in Programming Python by Mark Lutz

# EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
# GRAMMAR DATA STRUCTURES.
#
ARCHIVE = "."  

marshalfilename = ARCHIVE + "/pygram.mar"

pyrules = """

all ::

## input terminates with "fake" dedent (forces read of all file)

@R all1 :: all >> file_input DEDENT

## 1 term newline

##@R lead_blank :: file_input >> NEWLINE file_input

@R top_stmt :: file_input >> file_input stmt
@R file_input :: file_input >> stmt


## 2
@R simple :: stmt >> simple_stmt
@R compound :: stmt >> compound_stmt

## 3 punct ; term NEWLINE
@R one_small :: simple_stmt >> small_stmt NEWLINE
@R more_small :: simple_stmt >> small_stmt ; simple_stmt
@R small_semi :: simple_stmt >> small_stmt ; NEWLINE

## 4 kw pass
@R smexpr :: small_stmt >> expr_stmt
@R smassn :: small_stmt >> assn
@R smprint :: small_stmt >> print_stmt
@R smdel :: small_stmt >> del_stmt
@R smpass :: small_stmt >> pass
@R smflow :: small_stmt >> flow_stmt
@R smimport :: small_stmt >> import_stmt
@R smglobal :: small_stmt >> global_stmt
## access ignored
@R smexec :: small_stmt >> exec_stmt

## 5
@R cmif :: compound_stmt >> if_stmt
@R cmwhile :: compound_stmt >> while_stmt
@R cmfor :: compound_stmt >> for_stmt
@R cmtry :: compound_stmt >> try_stmt
@R cmdef :: compound_stmt >> funcdef
@R cmclass :: compound_stmt >> classdef

##6
@R exprlist :: expr_stmt >> testlist
##@R assignment :: expr_stmt >> assn
@R assn1 :: assn >> testlist = testlist

@R assnn :: assn >> testlist = assn

@R assn1c :: assn >> testlist , = testlist

@R assn1c2 :: assn >> testlist , = testlist ,

@R assnnc :: assn >> testlist , = assn

##testing @R exprassn :: expr_stmt >> expr_stmt = testlist 

@R exprlistc :: expr_stmt >> testlist ,

##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,

##7 kw print
@R rprint0 :: print_stmt >> print
@R rprint :: print_stmt >> print testlist 
@R rprintc :: print_stmt >> print testlist ,

##8 kw del
@R rdel :: del_stmt >> del exprlist

##9 trivially handled in #4

##10 kw raise continue break return

## eliminates 11 12 13 14
@R rbreak  :: flow_stmt >> break
@R rcontinue :: flow_stmt >> continue
@R rreturn0 :: flow_stmt >> return
@R rreturn :: flow_stmt >> return testlist 
@R rreturnc :: flow_stmt >> return testlist ,
@R rraise1 :: flow_stmt >> raise test
@R rraise2 :: flow_stmt >> raise test , test
@R rraise3 :: flow_stmt >> raise test , test , test

## 11 12 13 14 skipped

## 15 kw import from
@R rimport :: import_stmt >> import dotted_name_list 
@R rimportc :: import_stmt >> import dotted_name_list ,
@R dnlist1 :: dotted_name_list >> dotted_name
@R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
@R rfrom :: import_stmt >> from dotted_name import name_list 
@R rfroms :: import_stmt >> from dotted_name import *
@R rfromc :: import_stmt >> from dotted_name import name_list ,
@R nlistn :: name_list >> name_list  , NAME
@R nlist1 :: name_list >> NAME

##16 nt NAME
@R dn1 :: dotted_name >> NAME
@R dnn :: dotted_name >> dotted_name . NAME

##17 kw global
@R global1 :: global_stmt >> global NAME 
@R globaln :: global_stmt >> global_stmt , NAME 

## 18 19 ignored

##20 kw exec in
@R exec1 :: exec_stmt >> exec expr
@R exec2 :: exec_stmt >> exec expr in test
@R exec3 :: exec_stmt >> exec expr in test , test

##21  kw if elif else punct :
@R ifr :: if_stmt >> if test : suite elifs
@R elifs0 :: elifs >>
@R relse :: elifs >> else : suite
@R elifsn :: elifs >> elif test : suite elifs

##22 kw while
@R while1 :: while_stmt >> 
while test : 
    suite
@R while2 :: while_stmt >> 
while test : 
   suite 
else : 
   suite

##23 kw for
@R for1 :: for_stmt >> 
for exprlist in testlist  : 
     suite
@R for2 :: for_stmt >> 
for exprlist in testlist  : 
     suite 
else : 
     suite

##24 kw try
@R tryr :: try_stmt >> try : suite excepts
@R excepts1 :: excepts >> except_clause : suite
@R excepts2 :: excepts >> except_clause : suite else : suite
@R exceptsn :: excepts >> except_clause : suite excepts
@R tryf :: try_stmt >> try : suite finally : suite

##25 kw except
@R except0 :: except_clause >> except 
@R except1 :: except_clause >> except test
@R except2 :: except_clause >> except test , test

##26
@R class1 :: classdef  >> class NAME : suite
@R class2 :: classdef  >> class NAME ( testlist ) : suite

##27 kw def
@R rdef :: funcdef >> def NAME parameters : suite

##28, 29 punct = * 

## (modified from grammar presented)
@R params1 :: parameters >> ( varargslist )
@R params1c :: parameters >> ( varargslist , )
@R params2 :: varargslist >> 

## this is way too permissive: fix at semantic level
@R params3 :: varargslist >> arg
@R params4 :: varargslist >> varargslist , arg
@R argd :: arg >> NAME = test
@R arg2 :: arg >> fpdef
@R arg3 :: arg >> * NAME
@R arg4 :: arg >> ** NAME

## 30
@R fpdef1 :: fpdef  >> NAME
@R fpdef2 :: fpdef  >>  ( fplist )
@R fpdef2c :: fpdef  >>  ( fplist , )

##31
@R fplist1 :: fplist >> fpdef
@R fplistn :: fplist >> fplist , fpdef

##32 t INDENT DEDENT
@R ssuite :: suite >> simple_stmt
@R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
@R stmtseq1 :: stmtseq >> stmt
@R stmtseqn :: stmtseq >> stmtseq stmt

##33 kw or cancels 53
@R testor :: test >> or_test
@R testand :: or_test >> and_test
@R testor1 :: or_test >> or_test or and_test
## @R testlambda0 :: test >> lambda : test REDUNDANT
@R testlambda1 :: test >> lambda varargslist : test

##34 kw and
@R andnot :: and_test >> not_test
@R andand :: and_test >> and_test and not_test

##35 kw not
@R notnot :: not_test >> not not_test
@R notcmp :: not_test >> comparison

##36 NOTE KWS == >= <= <> !=
@R cmpexpr :: comparison >> expr
@R cmplt :: comparison >> comparison < expr
@R cmpgt :: comparison >> comparison > expr
@R cmpeq :: comparison >> comparison == expr
@R cmpge :: comparison >> comparison >= expr
@R cmple :: comparison >> comparison <=  expr
@R cmpnep :: comparison >> comparison <> expr
@R cmpne :: comparison >> comparison != expr
@R cmpin :: comparison >> comparison in expr
@R cmpnotin :: comparison >> comparison not in expr
@R cmpis :: comparison >> comparison is expr
@R cmpisnot :: comparison >> comparison is not expr

##37 kw is not punct > < ! (eliminated)

##38 p |
@R expr_xor :: expr >> xor_expr
@R expr_lor :: expr >> expr | xor_expr

##39 p ^
@R xor_and :: xor_expr >> and_expr
@R xor_xor :: xor_expr >> xor_expr ^ and_expr

##40
@R and_shift :: and_expr >> shift_expr
@R and_and :: and_expr >> and_expr & shift_expr

##41 note kw's << >x> note goofy x to avoid confusing the grammar
@R shift_arith :: shift_expr >> arith_expr
@R shift_left :: shift_expr >> shift_expr << arith_expr
@R shift_right :: shift_expr >> shift_expr >x> arith_expr

##42
@R arith_term :: arith_expr >> term
@R arith_plus :: arith_expr >> arith_expr + term
@R arith_minus :: arith_expr >> arith_expr - term

##43 p */%
@R termfactor :: term >> factor
@R termmul :: term >> term * factor
@R termdiv :: term >> term / factor
@R termmod :: term >> term % factor

## stuff for power
@R factorpower :: factor >> power
@R factorexp :: factor >> factor ** power

##44 p ~
@R powera :: power >> atom trailerlist
@R trailerlist0 :: trailerlist >> 
@R trailerlistn :: trailerlist >> trailer trailerlist
@R powerp :: power >> + power
@R powerm :: power >> - power
@R poweri :: power >> ~ power

##45 t NUMBER STRING
@R nulltup :: atom >> ( )
@R parens :: atom >> ( testlist )
@R parensc :: atom >> ( testlist , )
@R nulllist :: atom >> [ ]
@R list :: atom >> [ testlist  ]
@R listc :: atom >> [ testlist , ]
@R nulldict :: atom >> { }
@R dict :: atom >> { dictmaker   }
@R dictc :: atom >> { dictmaker , }
@R repr :: atom >> ` testlist  `
## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
@R aname :: atom >> NAME
## note number to be broken out into FLOAT OCTINT HEXINT INT
@R anumber :: atom >> NUMBER
@R astring :: atom >> stringseq
@R stringseq0 :: stringseq >> STRING
@R stringseqn :: stringseq >> stringseq STRING

##46 
@R nullcall :: trailer >> ( )
@R call :: trailer >> ( arglist  )
@R callc :: trailer >> ( arglist , )
@R index :: trailer >> [ subscriptdots ]
@R getattr :: trailer >> . NAME

##47
@R arg1 :: arglist >> argument
@R argn :: arglist >> arglist , argument
##@R argn1 :: arglist >> arglist , NAME = test

##48 ( !!!! is this wrong in PP?)

@R posarg :: argument >> test

## here the left test should be a NAME always, but parser doesn't like it
@R namearg :: argument >> test = test

##49 this IS wrong in PP (numeric ext)
@R nodots :: subscriptdots >> subscriptseq
@R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
@R subscript1 :: subscriptseq >> subscript
@R subscriptn :: subscriptseq >> subscriptseq , subscript
@R subscriptt :: subscript >> test
@R subscripts0 :: subscript >> :
@R subscriptsL :: subscript >> test :
@R subscriptsR :: subscript >> : test
@R subscripts :: subscript >> test : test

##50
@R exprlist1 :: exprlist >> expr
@R exprlistn :: exprlist >> exprlist , expr

##51
@R testlist0 :: testlist >> test
@R testlistn :: testlist >> testlist , test

##52
@R dictmaker1 :: dictmaker >> test : test
@R dictmaker2 :: dictmaker >> dictmaker , test : test

"""

nonterms = """
subscriptdots subscript arg
argument arglist subscriptseq params trailerlist
factor atom trailer dictmaker stringseq power
xor_expr and_expr shift_expr arith_expr term
and_test or_test not_test comparison comp_op expr
fplist stmtseq varargslist assn
expr elifs suite excepts parameters pbasic pdefault pspecial
testlist exprlist test dotted_name_list dotted_name name_list
if_stmt while_stmt for_stmt try_stmt funcdef classdef
expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
small_stmt compound_stmt stmt simple_stmt exec_stmt
file_input except_clause fpdef cmp_op
all
"""

import string
# python needs special handling for the lexical stuff
NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
NUMBERre = "[" + string.digits + "]+" # temporary!
STRINGre = '"[^"\n]*"' # to be overridden in lexdict
#NEWLINEre = "\n" # to be overridden in lexdict
INDENTre = "#" # a fake! to be overridden
DEDENTre = "#" # a fake! to be overridden

def echo(str):
    return str

def DeclareTerminals(Grammar):
    Grammar.Addterm("NAME", NAMEre, echo)
    Grammar.Addterm("NUMBER", NUMBERre, echo)
    Grammar.Addterm("STRING", STRINGre, echo)
    #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
    Grammar.Addterm("INDENT", INDENTre, echo)
    Grammar.Addterm("DEDENT", DEDENTre, echo)

# not >x> is a fake!
keywords = """
and break class continue def del elif else except exec
finally for from global if import in is lambda not or pass
print raise return try while == >= <= <> != >x> << NEWLINE
**
"""

import kjParser, string, re
from kjParser import KEYFLAG, ENDOFFILETERM

alphanumunder = string.letters+string.digits+"_"
alpha = string.letters + "_"

# components that are part of a identifier (cannot be next to kw).
id_letters = map(None, alphanumunder)

# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = re.compile(nametermre)

# terminator re for numbers (same as above but allow "." in num).
numtermre =  "[^" + alphanumunder + "\.]"
numterm = re.compile(numtermre)

parseerror = "parseerror"

pycommentre = r"(#.*)"

# whitespace regex outside of brackets
#  white followed by (comment\n maybe repeated)
#  DON'T EAT NEWLINE!!
pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre
pywhiteout = re.compile(pywhiteoutre)

# whitespace regex inside brackets
#  white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = re.compile(pywhiteinre)

# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = re.compile(allblankre)

# re for indentation (might accept empty string)
indentp = re.compile(r"[\t ]*")

# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]

# >two char kws as map of first 3 chars to others
char3k_data = """
  and break class continue def del elif else except
  finally for from global import lambda not pass print
  raise return try while exec
"""

char3kw = string.split(char3k_data)
char3kwdict = {}
for x in char3kw:
    char3kwdict[x[:3]] = x

# NOTE: newline is treated same as a punctuation
# NOTE: "' ARE NOT PUNCTS
punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
punctlist = map(None, punct)

kwmap = {}
for x in char2kw + punct2 + char3kw + map(None, punct):
    # everything parses as length 1 to the outer world.
    kwmap[x] = (((KEYFLAG, x), x), 1)

# special hack
kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)

#finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)

# Python lexical dictionary.

### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!

def RMATCH(re, key, start=0):
    group = re.match(key, start)
    if group is None: return -1
    return group.end() - group.start()

class pylexdict(kjParser.LexDictionary):
   def __init__(self):
       kjParser.LexDictionary.__init__(self)
       # need to add special map for >>
       self.brackets = 0 # count of active brackets
       self.realindex = 0 # where to start
       self.indents = [""] # stack of indents (start with a fake one)
       self.lineno = 0
       self.atdedent = 0
       ### handle multiple dedents correctly!!!
       ### translate tabs to 8 spaces...
       from kjParser import TERMFLAG
       self.NAMEflag = (TERMFLAG, "NAME")
       self.STRINGflag = (TERMFLAG, "STRING")
       self.NEWLINEflag = (TERMFLAG, "NEWLINE")
       self.INDENTflag = (TERMFLAG, "INDENT")
       self.DEDENTflag = (TERMFLAG, "DEDENT")
       self.NUMBERflag = (TERMFLAG, "NUMBER")

   def endoffile(self, String):
       # pop off all indentations!
       indents = self.indents
       #lastresult = self.lastresult
       self.realindex = len(String)
       if not indents:
          # pop indents
          #print "eof after dedent"
          result = self.lastresult = (ENDOFFILETERM, 0)
       else:
          #print "eof as dedent after", self.lastresult
          del indents[-1]
          if indents:
             dedent = indents[-1]
          else:
             dedent = ""
          result = self.lastresult = ((self.DEDENTflag, dedent), 1)
       #print "returning eof", result, "after", lastresult
       return result

   def Token(self, String, StartPosition):
       #print "Token", (StartPosition, 
       #  `String[self.realindex:self.realindex+20]`, self.lastresult)
       # HAVE TO FAKE OUT LEXER FOR DEDENTS
       # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
       # STRING POSITION IS MAINTAINED IN LexDict object.
       lastindex = self.lastindex
       lastresult = self.lastresult
       if self.laststring is not String:
          #print "parsing new string"
          self.laststring = String
          # special hack: skip lead whitespace
          cursor = 0
          self.lineno = 1
          while 1:
             test = RMATCH(pywhitein,String, cursor)
             if test<0: break
             next = cursor + test
             #print "lead skip:", next, String[cursor:next]
             if String[next]!="\n": break
             #skipped = String[cursor:next]
             #if "\n" in skipped:
             #   self.lineno = (
             #    self.lineno + len(string.splitfields(skipped, "\n")))
             #self.lineno = self.lineno+1
             cursor = next + 1
          self.realindex = cursor
          self.saveindex = 0
          self.indents = [""] # stack of indents (start with a fake one)
          # pretend we saw a newline
          self.lastresult = newlineresult
          if StartPosition!=0:
             self.laststring = None
             raise ValueError, "python lexical parsing must start at zero"
          lastindex = self.lastindex
          lastresult = None
       elif lastindex == StartPosition:
          #print "returning lastresult ", lastresult
          return lastresult
       elif lastindex != StartPosition-1:
          raise ValueError, "python lexer can't skip tokens"

       #print "parsing", StartPosition, lastresult
       # do newline counting here!
       delta = String[self.saveindex: self.realindex]
       #print "delta", `delta`
       if "\n" in delta:
          #print self.lineno, self.saveindex, self.realindex, `delta`
          self.lineno = self.lineno + len(
            string.splitfields(delta, "\n")) - 1
       realindex = self.saveindex = self.realindex
       self.lastindex = StartPosition

       # skip whitespace (including comments)
       ### needs to be improved to parse blank lines, count line numbers...
       # skip all totally blank lines (don't eat last newline)
       atlineend = (String[realindex:realindex+1] == "\n"
                    or lastresult is newlineresult
                    or self.atdedent)
       skipnewlines = (lastresult is newlineresult or
                       self.atdedent or
                       self.brackets>0)
       if atlineend: #String[realindex:realindex+1]=="\n":
          #print "trying to skip blank lines", String[realindex:realindex+10]
          while 1:
             #if String[realindex:realindex+1]=="\n":
             #   start = realindex+1 # move past current newline
             #   self.lineno = self.lineno + 1
             #else:
             #   start = realindex
             start = realindex
             if skipnewlines:
                while String[start:start+1]=="\n":
                   start = start+1
                   #self.lineno = self.lineno+1
             #print "matching", `String[start:start+10]`
             skip = RMATCH(pywhitein,String, start)
             #print "skip=", skip
             if skip<0: break
             rs = skip + realindex + (start-realindex)
             if rs==realindex: break
             #print "at", rs, `String[rs]`
             if (rs<len(String) and 
                 (String[rs] == "\n" or 
                  (skipnewlines and String[rs-1:rs]=="\n"))):
                #print "skipping blank line"
                #if lastresult is newlineresult or self.brackets>0: 
                #   rs = rs + 1
                #skipped = String[start:rs]
                #if "\n" in skipped:
                   #self.lineno = self.lineno + len(
                   #   string.splitfields(skipped, "\n"))
                self.realindex = realindex = rs
                #self.lineno = self.lineno+1
             else:
                if skipnewlines: self.realindex = realindex = start
                break
       #print "after skipping blank lines", `String[realindex:realindex+20]`
       skipto = realindex
       skip = 0
       if self.brackets>0:
          while 1:
             #print "skipping white in brackets", skipto
             if realindex>len(String):
                break
             if String[skipto]=="\n":
                #self.lineno = self.lineno+1
                skipto = skipto + 1
                self.realindex = realindex = skipto
                continue
             skip = RMATCH(pywhiteout,String, skipto)
             nextskipto = skipto+skip
             #skipped = String[skipto:nextskipto]
             #if "\n" in skipped:
             #   self.lineno = self.lineno+len(
             #       string.splitfields(skipped, "\n"))
             if skip>0:
                skipto = nextskipto
             else: break
          skip = skipto - realindex
       elif not atlineend:
          skip = RMATCH(pywhitein,String, realindex)
       if skip<=0: 
          skip = 0
       else:
          #print "skipping", skip
          nextri = realindex + skip
          #skipped = String[realindex:nextri]
          #if "\n" in skipped:
          #   self.lineno = self.lineno + len(
          #    string.splitfields(skipped, "\n"))
          realindex = self.realindex = nextri
       if realindex>=len(String):
          return self.endoffile(String)
       # now look for a keyword, name, number, punctuation, 
       # INDENT, DEDENT, NEWLINE
       first = String[realindex]
       #if last parse was newline and not in brackets:
       #   look for indent/dedent
       if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
           and first != "\n"):
          #print "looking for dent", realindex, `String[realindex:realindex+20]`
          match = RMATCH(indentp,String, realindex)
          if match>=0:
             dent = String[realindex: realindex+match]
             #print "dent match", match, `dent`
             oldindex = realindex
             self.realindex = realindex = realindex+match
             # replace tabs with 8 spaces
             dent = string.joinfields(string.splitfields(dent, "\t"),
                                      "        ")
             dents = self.indents
             lastdent = dents[-1]
             ldl = len(lastdent)
             dl = len(dent)
             #print "last", ldl, dents
             if ldl<dl:
                self.atdedent = 0
                result = self.lastresult = ((self.INDENTflag, dent), 1)
                dents.append(dent)
                #print "indent ", result, dents
                return result
             if ldl>dl:
                self.realindex = oldindex # back up, may have to see it again!
                self.atdedent = 1
                result = self.lastresult = ((self.DEDENTflag, dent), 1)
                del dents[-1]
                #print "dedent ", result, dl, dents
                return result
             # otherwise, indentation is same, keep looking
             # might be at eof now:
             if realindex>=len(String):
                #print "returning eof"
                return self.endoffile(String)
             first = String[realindex]
       self.atdedent = 0
       from string import digits #, letters
       if (first in punctlist and
           # special case for .123 numbers (yuck!)
           (first!="." or String[realindex+1] not in digits)):
          # is it a 2 char punct?
          first2 = String[realindex:realindex+2]
          if first2 in punct2:
             result = self.lastresult = kwmap[first2]
             self.realindex = realindex+2
             #print "2 digit punct", result
             return result
          # otherwise, just return normal punct
          result = self.lastresult = kwmap[first]
          self.realindex = self.realindex + 1
          ### special bookkeeping
          if first=="\n":
             result = newlineresult
             #print "newline!"
             #self.lineno = self.lineno+1
          elif first in "[{(":
             #print "bracket!"
             self.brackets = self.brackets + 1
          elif first in "]})":
             #print "close bracket!"
             self.brackets = self.brackets - 1
          #print "1 digit punct", result
          return result
       if first in digits or first==".":
          # parse a number...
          skip = numterm.search(String, realindex)
          if skip<=realindex:
             raise parseerror, "number length<1 (!)"
          thenumber = String[realindex:skip]
          self.realindex = skip
          ### note don't interpret number here!!
          result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
          #print "number", result
          return result
       if first in alpha:
          # try keyword...
          first2 = String[realindex: realindex+2]
          if first2 in char2kw:
             if String[realindex+2:realindex+3] not in id_letters:
                # parse a 2 char kw first2
                result = self.lastresult = kwmap[first2]
                self.realindex = self.realindex+2
                #print "keyword 2", result
                return result
          first3 = String[realindex: realindex+3]
          if char3kwdict.has_key(first3):
             the_kw = char3kwdict[first3]
             the_end = realindex+len(the_kw)
             if ((the_end<len(String)) and 
                 (String[the_end] not in id_letters) and
                 (String[realindex:the_end]==the_kw)):
                # parse the_kw
                self.realindex = the_end
                result = self.lastresult = kwmap[the_kw]
                #print "keyword +", result
                return result
          #otherwise parse an identifier
          #print "looking for name:", `String[realindex:realindex+10]`
          skip = nameterm.search(String, realindex)
          if skip<=realindex:
             raise parseerror, "identifier length<1 (!)"
          theid = String[realindex:skip]
          self.realindex = skip
          ### note don't interpret number here!!
          result = self.lastresult = ((self.NAMEflag, theid), 1)
          #print "id", result
          return result
       if first in "\"'":
          # check for triplequotes
          first3 = first*3
          if String[realindex: realindex+3] == first3:
             # parse triple quotes
             start = place = realindex+3
             while 1:
                last = string.find(String, first3, place)
                if last<0:
                   raise parseerror, "failed to terminate triple quotes"
                if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
                   place = last+1
                else: break
             the_string = String[start: last]
             self.realindex = last+3
             result = self.lastresult = ((self.STRINGflag, the_string), 1)
             #print "3q string", result
             # count the newlines!
             #newlinecount = len(string.splitfields(the_string, "\n"))
             #self.lineno = self.lineno+newlinecount
             #print "triple quotes", result
             return result
          else:
             # parse single quotes
             sanity = start = place = realindex+1
             done = 0
             while 1:
                sanity = min(string.find(String, "\n", sanity), len(String))
                if sanity<start: 
                   sanity=len(String)
                   break
                if String[sanity-1]!="\\":
                   break
                else:
                   #self.lineno = self.lineno+1
                   sanity = sanity + 1
             while 1:
                last = string.find(String, first, place)
                if last<0 or last>sanity:
                   raise parseerror, "failed to terminate single quotes"
                if String[last-1:last]=="\\":
                   # are we at the end of an odd number of backslashes? (yuck!)
                   bplace = last-1
                   while String[bplace:bplace+1]=="\\":
                      bplace = bplace-1
                   if (last-bplace)%2==1: 
                      break # the end quote is real!
                   place = last+1
                else: break
             the_string = String[start:last]
             self.realindex = last+1
             result = self.lastresult = ((self.STRINGflag, the_string), 1)
             #print "1q string", result
             return result
       #print (String[realindex-20:realindex-1], String[realindex],
       #       String[realindex+1:realindex+20])
       raise parseerror, "invalid first: " + `first`

# use a modified lexstringwalker
class pylexstringwalker(kjParser.LexStringWalker):
   def DUMP(self):
       kjParser.DumpStringWindow(self.String, self.LexDict.realindex)

## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
## to give Python line numbers.  RELIES ON GLOBAL pyg
##
def hackDoParse(String, Context=None, DoReductions=1):
    import sys, kjParser
    try:
        # construct the ParserObj
        # add a newline to front to avoid problem with leading comment
        #String = "\n%s\n" % String
        Stream = pylexstringwalker( String, pyg.LexD )
        Stack = [] # {-1:0} #Walkers.SimpleStack()
        ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
                         DoReductions, Context )
        # do the parse
        ParseResult = ParseOb.GO()
        # return final result of reduction and the context
        return (ParseResult[1], Context)
        #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
    except: ### for testing!!
        t, v = sys.exc_type, sys.exc_value
        v = ("near line", pyg.LexD.lineno, v)
        raise t, v

buildinfo = """
Please edit the ARCHIVE parameter of this module (%s)
to place the python grammar archive in a standard
directory to prevent the module from rebuilding
the python grammar over and over and over...
""" % __name__

def GrammarBuild():
    global pyg
    import kjParseBuild
    pyg = kjParseBuild.NullCGrammar()
    pyg.DoParse = hackDoParse
    # override lexical dict here
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    pyg.Keywords(keywords)
    pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
    pyg.Nonterms(nonterms)
    pyg.Declarerules(pyrules)
    print buildinfo
    print "compiling... this may take a while..."
    pyg.Compile()
    print "dumping"
    outfile = open(marshalfilename, "wb")
    pyg.MarshalDump(outfile)
    outfile.close()
    print "self testing the grammar"
    test(pyg)
    print "\n\ndone with regeneration"
    return pyg

def unMarshalpygram():
    global pyg
    import kjParser
    print "loading"
    try:
       infile = open(marshalfilename, "rb")
    except IOError:
       print marshalfilename, "not found, attempting creation"
       pyg = GrammarBuild()
    else:
       pyg = kjParser.UnMarshalGram(infile)
       infile.close()
    pyg.DoParse = hackDoParse
    # lexical override
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    # BindRules(pyg)
    if dotest: 
       print "self testing the grammar"
       test(pyg)
    return pyg


# not used, commented
#### interpretation rules/classes
#
#def zeroth(list, Context):
#    return list[0] # eg, for all1, ignore all but first
#
## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
#def append(list, Context):
#    "eg, for top_stmt, conjoin two smt lists"
#    return list[0] + list[1]
#
## file_input >zeroth
#
## simple, compound, one_small, small_semi: echol
#def echol(list, Context):
#    return list
#
## more_small > seq_sep
#def seq_sep(list, Context):
#    list[0].append(list[2])
#    return list[0]
#
## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
##  > zeroth
#
## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth
#
#
#def BindRules(pyg):
#    for name in string.split("""
#        all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
#        smexpr smassn smprint smdel smflow smimport smglobal smexec
#        """):
#        pyg.Bind(name, zeroth)
#    for name in string.split("""
#        simple compound one_small small_semi
#        """):
#        pyg.Bind(name, echol)
#    pyg.Bind("top_stmt", append)
#    pyg.Bind("more_small", seq_sep)

teststring = """#
#
# a test string
#
from string import join, split
'''
import re

for a in l:
    a.attr, a[x], b = c
else:
    d = b
'''
class zzz:
   ''' 
   #doc string 
   '''
   '''
   global re, join
   
   d = {} 
   for i in range(10): d[i] = i
   '''
   def test(c,s):
       return "this" 
       while not done:
             print done
             break
       list = [1,2,3]
         # comment
       return 5
   
   
   n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8

if x==5:
   while y:
     for i in range(6):
         raise SystemError, "oops"


"""

#teststring ="""\
## comment
#if x in y: print z
#elif 1: print w
#"""

'''
teststring="""
exec "print 1"
"""
'''

def test(grammar, context=None, teststring=teststring):
       from time import time
       now = time()
       x = grammar.DoParse1(teststring, context)
       elapsed = time()-now
       print x
       print elapsed
       return x
   
regen = 0
dotest = 0
   
if __name__ == "__main__" : 
      if regen: GrammarBuild()
      unMarshalpygram()