Context Navigation

source: trunk/python2/lexer.py @ 200

Last change on this file since 200 was 196, checked in by tim, 15 years ago
experimental python bindings generator as provided by Michael Cohen
File size: 7.1 KB

Line
1	#!/usr/bin/env python
2	# ******************************************************
3	# Michael Cohen <scudette@users.sourceforge.net>
4	#
5	# ******************************************************
6	# Version: FLAG $Version: 0.87-pre1 Date: Thu Jun 12 00:48:38 EST 2008$
7	# ******************************************************
8	#
9	# * This program is free software; you can redistribute it and/or
10	# * modify it under the terms of the GNU General Public License
11	# * as published by the Free Software Foundation; either version 2
12	# * of the License, or (at your option) any later version.
13	# *
14	# * This program is distributed in the hope that it will be useful,
15	# * but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	# * GNU General Public License for more details.
18	# *
19	# * You should have received a copy of the GNU General Public License
20	# * along with this program; if not, write to the Free Software
21	# * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22	# ******************************************************
23	""" A simple feed lexer.
24	"""
25
26	import re,sys
27
28	class Lexer:
29	""" A generic feed lexer """
30	## The following is a description of the states we have and the
31	## way we move through them: format is an array of
32	## [ state_re, re, token/action, next state ]
33	tokens = []
34	state = "INITIAL"
35	buffer = ''
36	error = 0
37	verbose = 0
38	state_stack = []
39	processed = 0
40	processed_buffer = ''
41	saved_state = None
42	flags = 0
43
44	def __init__(self, verbose=0, fd=None):
45	if not self.verbose:
46	self.verbose = verbose
47
48	if len(self.tokens[0])==4:
49	for row in self.tokens:
50	row.append(re.compile(row[0], re.DOTALL))
51	row.append(re.compile(row[1], re.DOTALL \| re.M \| re.S \| self.flags ))
52
53	self.fd = fd
54
55	def save_state(self, t=None, m=None):
56	""" Returns a dict which represents the current state of the lexer.
57
58	When provided to restore_state, the lexer is guaranteed to be
59	in the same state as when the save_state was called.
60
61	Note that derived classes may need to extend this.
62	"""
63	## Cant save our state if we have errors. We need to guarantee
64	## that we rewind to a good part of the file.
65	if self.error: return
66	try:
67	end = m.end()
68	except: end = 0
69
70	self.saved_state = dict(state_stack = self.state_stack[:],
71	processed = self.processed - end,
72	processed_buffer = self.processed_buffer,
73	readptr = self.fd.tell() - len(self.buffer) - end,
74	state = self.state,
75	objects = self.objects[:],
76	error = self.error,
77	)
78
79	if self.verbose>1:
80	print "Saving state %s" % self.processed
81
82	def restore_state(self):
83	state = self.saved_state
84	if not state: return
85
86	self.state_stack = state['state_stack']
87	self.processed = state['processed']
88	self.processed_buffer = state['processed_buffer']
89	self.buffer = ''
90	self.fd.seek(state['readptr'])
91	self.state = state['state']
92	self.objects = state['objects']
93	self.error = state['error']
94	if self.verbose>1:
95	print "Restoring state to offset %s" % self.processed
96
97	def next_token(self, end = True):
98	## Now try to match any of the regexes in order:
99	current_state = self.state
100	for state_re, re_str, token, next, state, regex in self.tokens:
101	## Does the rule apply for us now?
102	if state.match(current_state):
103	if self.verbose > 2:
104	print "%s: Trying to match %r with %r" % (self.state, self.buffer[:10], re_str)
105	m = regex.match(self.buffer)
106	if m:
107	if self.verbose > 3:
108	print "%s matched %s" % (re_str, m.group(0).encode("utf8"))
109	## The match consumes the data off the buffer (the
110	## handler can put it back if it likes)
111	self.processed_buffer += self.buffer[:m.end()]
112	self.buffer = self.buffer[m.end():]
113	self.processed += m.end()
114
115	## Try to iterate over all the callbacks specified:
116	for t in token.split(','):
117	try:
118	if self.verbose > 0:
119	print "0x%X: Calling %s %r" % (self.processed, t, m.group(0))
120	cb = getattr(self, t, self.default_handler)
121	except AttributeError:
122	continue
123
124	## Is there a callback to handle this action?
125	next_state = cb(t, m)
126	if next_state == "CONTINUE":
127	continue
128
129	elif next_state:
130	next = next_state
131	self.state = next
132
133
134	if next:
135	self.state = next
136
137	return token
138
139	## Check that we are making progress - if we are too full, we
140	## assume we are stuck:
141	if end and len(self.buffer)>0 or len(self.buffer)>1024:
142	self.processed_buffer += self.buffer[:1]
143	self.buffer = self.buffer[1:]
144	self.ERROR("Lexer Stuck, discarding 1 byte (%r) - state %s" % (self.buffer[:10], self.state))
145	return "ERROR"
146
147	## No token were found
148	return None
149
150	def feed(self, data):
151	self.buffer += data
152
153	def empty(self):
154	return not len(self.buffer)
155
156	def default_handler(self, token, match):
157	if self.verbose > 2:
158	print "Default handler: %s with %r" % (token,match.group(0))
159
160	def ERROR(self, message = None, weight =1):
161	if self.verbose > 0 and message:
162	print "Error(%s): %s" % (weight,message)
163
164	self.error += weight
165
166	def PUSH_STATE(self, token = None, match = None):
167	if self.verbose > 1:
168	print "Storing state %s" % self.state
169	self.state_stack.append(self.state)
170
171	def POP_STATE(self, token = None, match = None):
172	try:
173	state = self.state_stack.pop()
174	if self.verbose > 1:
175	print "Returned state to %s" % state
176
177	return state
178	except IndexError:
179	print "Tried to pop the state but failed - possible recursion error"
180	return None
181
182	def close(self):
183	""" Just a conveniece function to force us to parse all the data """
184	while self.next_token(): pass
185
186	class SelfFeederMixIn(Lexer):
187	""" This mixin is used to make a lexer which feeds itself one
188	sector at the time.
189
190	Note that self.fd must be the fd we read from.
191	"""
192	def parse_fd(self, fd):
193	self.feed(fd.read())
194	while self.next_token(): pass
195

Note: See TracBrowser for help on using the repository browser.

Download in other formats: