Mercurial > lbo > hg > pcombinators

--- a/pcombinators/state.py	Sat May 25 23:58:11 2019 +0200
+++ b/pcombinators/state.py	Sun May 26 00:15:03 2019 +0200
@@ -90,6 +90,7 @@
     """A lazy parsing state implementation, reading from stream."""
     _index = 0 # Index in current _buf
     _total_offset = 0 # Index of first _buf entry in stream since start
+    _fobj = None

     def __repr__(self):
         return 'PFS(ix={}, to={}, buf="{}")'.format(self._index, self._total_offset, ''.join(self._buf))
@@ -120,6 +121,9 @@
             return
         # No holds left, forget everything up to now.
         if len(self._holds) == 0:
+            # This copies the entire buffer; however, we assume that it is small
+            # as it is only filled incrementally from the stream. Still, it's quadratic;
+            # this should be mitigated somewhat by the COLLECT_LOWER_LIMIT.
             self._buf = self._buf[self._index:]
             self._total_offset += self._index
             self._index = 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pcombinators/tests/test_csv.py	Sun May 26 00:15:03 2019 +0200
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat May 25 23:58:21 2019
+
+@author: lbo
+"""
+
+import io
+import unittest
+
+import pcombinators.state as st
+import pcombinators.tests.csv as csv
+
+def line(l):
+    return csv.line.parse(st.ps(l))[0]
+
+def value(v):
+    return csv.value.parse(st.ps(v))[0]
+
+def file(f):
+    return csv.file.parse(st.ps(f))[0]
+
+class CSVTest(unittest.TestCase):
+
+    def test_values(self):
+        self.assertEqual(1, value('1'))
+        self.assertEqual(12, value('12,'))
+        self.assertEqual(1.23, value('1.23'))
+        self.assertEqual('abc', value('"abc"'))
+
+    def test_line(self):
+        self.assertEqual([1], line('1,'))
+        self.assertEqual([1], line('1'))
+        self.assertEqual([1,2,3,4,5], line('1, 2,3,   4,5\n'))
+        self.assertEqual(["a,b", "c", 22], line('"a,b","c", 22\n'))
+
+    def test_file(self):
+        csv_in = '"title1", "title2", "title3"\n\n1, 2, "aaa"\n"12", 4, "bbb"\n'
+        want = [['title1', 'title2', 'title3'], [], [1, 2, 'aaa'], ['12', 4, 'bbb']]
+        self.assertEqual(want, file(csv_in))
+        self.assertEqual(want, csv.file.parse(st.ParseFileState(io.StringIO(csv_in)))[0])
+
+if __name__ == '__main__':
+    st.ParseFileState.COLLECT_LOWER_LIMIT = 0
+    unittest.main()