Context Navigation

source: trunk/python2/lexer.py @ 200

Last change on this file since 200 was 196, checked in by tim, 15 years ago
experimental python bindings generator as provided by Michael Cohen
File size: 7.1 KB

Rev	Line
[196]	1	#!/usr/bin/env python
	2	# ******************************************************
	3	# Michael Cohen <scudette@users.sourceforge.net>
	4	#
	5	# ******************************************************
	6	# Version: FLAG $Version: 0.87-pre1 Date: Thu Jun 12 00:48:38 EST 2008$
	7	# ******************************************************
	8	#
	9	# * This program is free software; you can redistribute it and/or
	10	# * modify it under the terms of the GNU General Public License
	11	# * as published by the Free Software Foundation; either version 2
	12	# * of the License, or (at your option) any later version.
	13	# *
	14	# * This program is distributed in the hope that it will be useful,
	15	# * but WITHOUT ANY WARRANTY; without even the implied warranty of
	16	# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	17	# * GNU General Public License for more details.
	18	# *
	19	# * You should have received a copy of the GNU General Public License
	20	# * along with this program; if not, write to the Free Software
	21	# * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	22	# ******************************************************
	23	""" A simple feed lexer.
	24	"""
	25
	26	import re,sys
	27
	28	class Lexer:
	29	""" A generic feed lexer """
	30	## The following is a description of the states we have and the
	31	## way we move through them: format is an array of
	32	## [ state_re, re, token/action, next state ]
	33	tokens = []
	34	state = "INITIAL"
	35	buffer = ''
	36	error = 0
	37	verbose = 0
	38	state_stack = []
	39	processed = 0
	40	processed_buffer = ''
	41	saved_state = None
	42	flags = 0
	43
	44	def __init__(self, verbose=0, fd=None):
	45	if not self.verbose:
	46	self.verbose = verbose
	47
	48	if len(self.tokens[0])==4:
	49	for row in self.tokens:
	50	row.append(re.compile(row[0], re.DOTALL))
	51	row.append(re.compile(row[1], re.DOTALL \| re.M \| re.S \| self.flags ))
	52
	53	self.fd = fd
	54
	55	def save_state(self, t=None, m=None):
	56	""" Returns a dict which represents the current state of the lexer.
	57
	58	When provided to restore_state, the lexer is guaranteed to be
	59	in the same state as when the save_state was called.
	60
	61	Note that derived classes may need to extend this.
	62	"""
	63	## Cant save our state if we have errors. We need to guarantee
	64	## that we rewind to a good part of the file.
	65	if self.error: return
	66	try:
	67	end = m.end()
	68	except: end = 0
	69
	70	self.saved_state = dict(state_stack = self.state_stack[:],
	71	processed = self.processed - end,
	72	processed_buffer = self.processed_buffer,
	73	readptr = self.fd.tell() - len(self.buffer) - end,
	74	state = self.state,
	75	objects = self.objects[:],
	76	error = self.error,
	77	)
	78
	79	if self.verbose>1:
	80	print "Saving state %s" % self.processed
	81
	82	def restore_state(self):
	83	state = self.saved_state
	84	if not state: return
	85
	86	self.state_stack = state['state_stack']
	87	self.processed = state['processed']
	88	self.processed_buffer = state['processed_buffer']
	89	self.buffer = ''
	90	self.fd.seek(state['readptr'])
	91	self.state = state['state']
	92	self.objects = state['objects']
	93	self.error = state['error']
	94	if self.verbose>1:
	95	print "Restoring state to offset %s" % self.processed
	96
	97	def next_token(self, end = True):
	98	## Now try to match any of the regexes in order:
	99	current_state = self.state
	100	for state_re, re_str, token, next, state, regex in self.tokens:
	101	## Does the rule apply for us now?
	102	if state.match(current_state):
	103	if self.verbose > 2:
	104	print "%s: Trying to match %r with %r" % (self.state, self.buffer[:10], re_str)
	105	m = regex.match(self.buffer)
	106	if m:
	107	if self.verbose > 3:
	108	print "%s matched %s" % (re_str, m.group(0).encode("utf8"))
	109	## The match consumes the data off the buffer (the
	110	## handler can put it back if it likes)
	111	self.processed_buffer += self.buffer[:m.end()]
	112	self.buffer = self.buffer[m.end():]
	113	self.processed += m.end()
	114
	115	## Try to iterate over all the callbacks specified:
	116	for t in token.split(','):
	117	try:
	118	if self.verbose > 0:
	119	print "0x%X: Calling %s %r" % (self.processed, t, m.group(0))
	120	cb = getattr(self, t, self.default_handler)
	121	except AttributeError:
	122	continue
	123
	124	## Is there a callback to handle this action?
	125	next_state = cb(t, m)
	126	if next_state == "CONTINUE":
	127	continue
	128
	129	elif next_state:
	130	next = next_state
	131	self.state = next
	132
	133
	134	if next:
	135	self.state = next
	136
	137	return token
	138
	139	## Check that we are making progress - if we are too full, we
	140	## assume we are stuck:
	141	if end and len(self.buffer)>0 or len(self.buffer)>1024:
	142	self.processed_buffer += self.buffer[:1]
	143	self.buffer = self.buffer[1:]
	144	self.ERROR("Lexer Stuck, discarding 1 byte (%r) - state %s" % (self.buffer[:10], self.state))
	145	return "ERROR"
	146
	147	## No token were found
	148	return None
	149
	150	def feed(self, data):
	151	self.buffer += data
	152
	153	def empty(self):
	154	return not len(self.buffer)
	155
	156	def default_handler(self, token, match):
	157	if self.verbose > 2:
	158	print "Default handler: %s with %r" % (token,match.group(0))
	159
	160	def ERROR(self, message = None, weight =1):
	161	if self.verbose > 0 and message:
	162	print "Error(%s): %s" % (weight,message)
	163
	164	self.error += weight
	165
	166	def PUSH_STATE(self, token = None, match = None):
	167	if self.verbose > 1:
	168	print "Storing state %s" % self.state
	169	self.state_stack.append(self.state)
	170
	171	def POP_STATE(self, token = None, match = None):
	172	try:
	173	state = self.state_stack.pop()
	174	if self.verbose > 1:
	175	print "Returned state to %s" % state
	176
	177	return state
	178	except IndexError:
	179	print "Tried to pop the state but failed - possible recursion error"
	180	return None
	181
	182	def close(self):
	183	""" Just a conveniece function to force us to parse all the data """
	184	while self.next_token(): pass
	185
	186	class SelfFeederMixIn(Lexer):
	187	""" This mixin is used to make a lexer which feeds itself one
	188	sector at the time.
	189
	190	Note that self.fd must be the fd we read from.
	191	"""
	192	def parse_fd(self, fd):
	193	self.feed(fd.read())
	194	while self.next_token(): pass
	195

Note: See TracBrowser for help on using the repository browser.

Download in other formats: