changeset 0:ed57136df979 draft

Initial commit
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 19 May 2019 12:00:58 +0200
parents
children 57092e2bbe6c
files combinators.py
diffstat 1 files changed, 157 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/combinators.py	Sun May 19 12:00:58 2019 +0200
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May 19 11:20:01 2019
+
+@author: lbo
+"""
+
+import re
+
+class ParseState:
+    """Encapsulates state as the parser goes through input."""
+
+    _input = ''
+    _index = 0
+
+    def __init__(self, s):
+        self._input = s
+
+    def __repr__(self):
+        if self._index < len(self._input):
+            return 'ParseState({}< {} >{})'.format(
+                    self._input[0:self._index], self._input[self._index], self._input[self._index+1:])
+        else:
+            return 'ParseState({}<>)'.format(self._input)
+
+    def next(self):
+        current = self.peek()
+        self._index += 1
+        while not self.finished() and self.peek().isspace():
+            self._index += 1
+        return current
+
+    def peek(self):
+        return self._input[self._index]
+
+    def index(self):
+        return self._index
+
+    def reset(self, ix):
+        self._index = ix
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return self.next()
+
+    def finished(self):
+        return self._index == len(self._input)
+    
+    def remaining(self):
+        if self.finished():
+            return ''
+        return self._input[self._index:]
+    
+    def sub(self, start, length):
+        assert self._index+start < self._index+start+length <= len(self._input)
+        return ParseState(self._input[self._index+start:self._index+start+length])
+
+class Parser:
+    type = None
+    
+    def parse(self, st):
+        return (None, st)
+    
+    def __add__(self, other):
+        return AtomicSequence(self, other)
+    
+# Combinators
+        
+class _Sequence(Parser):
+    _parsers = []
+    _atomic = None
+    
+    def __init__(self, *parsers):
+        self._parsers = parsers
+    
+    def parse(self, st):
+        results = []
+        initial = st.index()
+        for p in self._parsers:
+            before = st.index()
+            result, st2 = p.parse(st)
+            if result is None:
+                if self._atomic:
+                    st.reset(initial)
+                    return None, st
+                st.reset(before)
+                break
+            results.append(result)
+            st = st2
+        return results, st2
+    
+class AtomicSequence(_Sequence):
+    """Execute a series of parsers after each other. All must succeed."""
+    _atomic = True
+
+class OptimisticSequence(_Sequence):
+    """Execute a series of parsers after each other, as far as possible."""
+    _atomic = False
+
+class Alternative(Parser):
+    """Attempt a series of parsers and return the result of the first one matching."""
+    _parsers = []
+    
+    def __init__(self, *parsers):
+        self._parsers = parsers
+    
+    def parse(self, st):
+        initial = st.index()
+        for p in self._parsers:
+            r, st2 = p.parse(st)
+            if r is not None:
+                return r, st2
+            st.reset(initial)
+        return None, st
+
+# Parsers
+    
+class String(Parser):
+    _s = ''
+    
+    def __init__(self, s):
+        self._s = s
+    
+    def parse(self, st):
+        initial = st.index()
+        s = self._s
+        while len(s) > 0 and not st.finished() and s[0] == st.peek():
+            st.next()
+            s = s[1:]
+        if len(s) == 0:
+            return (self._s, st)
+        st.reset(initial)
+        return (None, st)
+
+class Regex(Parser):
+    """Parse a string using a regular expression. The result is either the
+    string or a tuple with all matched groups."""
+    _rx = None
+    
+    def __init__(self, rx):
+        if not isinstance(rx, re.Pattern):
+            rx = re.compile(rx)
+        self._rx = rx
+    
+    def parse(self, st):
+        match = re.match(self._rx, st.remaining())
+        if match is None:
+            return None, st
+        begin, end = match.span()
+        result = match.group(0)
+        if len(match.groups()) > 0:
+            result = match.groups()
+        st.reset(end)
+        return result, st
\ No newline at end of file