Mercurial > lbo > hg > pcombinators
changeset 0:ed57136df979 draft
Initial commit
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sun, 19 May 2019 12:00:58 +0200 |
parents | |
children | 57092e2bbe6c |
files | combinators.py |
diffstat | 1 files changed, 157 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/combinators.py Sun May 19 12:00:58 2019 +0200 @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun May 19 11:20:01 2019 + +@author: lbo +""" + +import re + +class ParseState: + """Encapsulates state as the parser goes through input.""" + + _input = '' + _index = 0 + + def __init__(self, s): + self._input = s + + def __repr__(self): + if self._index < len(self._input): + return 'ParseState({}< {} >{})'.format( + self._input[0:self._index], self._input[self._index], self._input[self._index+1:]) + else: + return 'ParseState({}<>)'.format(self._input) + + def next(self): + current = self.peek() + self._index += 1 + while not self.finished() and self.peek().isspace(): + self._index += 1 + return current + + def peek(self): + return self._input[self._index] + + def index(self): + return self._index + + def reset(self, ix): + self._index = ix + + def __iter__(self): + return self + + def __next__(self): + return self.next() + + def finished(self): + return self._index == len(self._input) + + def remaining(self): + if self.finished(): + return '' + return self._input[self._index:] + + def sub(self, start, length): + assert self._index+start < self._index+start+length <= len(self._input) + return ParseState(self._input[self._index+start:self._index+start+length]) + +class Parser: + type = None + + def parse(self, st): + return (None, st) + + def __add__(self, other): + return AtomicSequence(self, other) + +# Combinators + +class _Sequence(Parser): + _parsers = [] + _atomic = None + + def __init__(self, *parsers): + self._parsers = parsers + + def parse(self, st): + results = [] + initial = st.index() + for p in self._parsers: + before = st.index() + result, st2 = p.parse(st) + if result is None: + if self._atomic: + st.reset(initial) + return None, st + st.reset(before) + break + results.append(result) + st = st2 + return results, st2 + +class AtomicSequence(_Sequence): + """Execute a series of parsers after each other. All must succeed.""" + _atomic = True + +class OptimisticSequence(_Sequence): + """Execute a series of parsers after each other, as far as possible.""" + _atomic = False + +class Alternative(Parser): + """Attempt a series of parsers and return the result of the first one matching.""" + _parsers = [] + + def __init__(self, *parsers): + self._parsers = parsers + + def parse(self, st): + initial = st.index() + for p in self._parsers: + r, st2 = p.parse(st) + if r is not None: + return r, st2 + st.reset(initial) + return None, st + +# Parsers + +class String(Parser): + _s = '' + + def __init__(self, s): + self._s = s + + def parse(self, st): + initial = st.index() + s = self._s + while len(s) > 0 and not st.finished() and s[0] == st.peek(): + st.next() + s = s[1:] + if len(s) == 0: + return (self._s, st) + st.reset(initial) + return (None, st) + +class Regex(Parser): + """Parse a string using a regular expression. The result is either the + string or a tuple with all matched groups.""" + _rx = None + + def __init__(self, rx): + if not isinstance(rx, re.Pattern): + rx = re.compile(rx) + self._rx = rx + + def parse(self, st): + match = re.match(self._rx, st.remaining()) + if match is None: + return None, st + begin, end = match.span() + result = match.group(0) + if len(match.groups()) > 0: + result = match.groups() + st.reset(end) + return result, st \ No newline at end of file