changeset 139:d320f32169ee

preprocess: Overhaul preprocessing and implement reference resolution
author Lewin Bormann <lbo@spheniscida.de>
date Tue, 03 Sep 2019 09:41:17 +0200
parents 74fadb9ea35f
children a4a6169cccc0
files src/preprocess.c src/preprocess.h
diffstat 2 files changed, 133 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/src/preprocess.c	Tue Sep 03 09:40:21 2019 +0200
+++ b/src/preprocess.c	Tue Sep 03 09:41:17 2019 +0200
@@ -6,8 +6,7 @@
 
 void ypreprocess(yexpr_t* expr) {
     ypreprocess_resolve_builtins(expr);
-    ypreprocess_defn(expr);
-    // ypreprocess_refs(expr);
+    ypreprocess_refs(expr);
 }
 
 void ypreprocess_resolve_builtins(yexpr_t* expr) {
@@ -69,9 +68,10 @@
  *
  * `expr` must be a list starting with the `defn` built-in.
  *
- * Returns false if syntax is incorrect.
+ * Returns false if syntax is incorrect. The reference to the function is stored
+ * into `newref`.
  */
-static bool ypreprocess_compile_defn(yexpr_t* expr) {
+static bool ypreprocess_compile_defn(yexpr_t* expr, yref_t* newref) {
     yvec_t* list = &expr->value.list;
     assert(4 <= list->len);
     yexpr_t* defn = YVEC_AT(list, 0, yexpr_t);
@@ -84,8 +84,8 @@
     if (args->typ != YEXPR_LIST) return false;
     size_t n_args = args->value.list.len;
 
+    yfunc_t func;
     yvec_t body_exprs = YVEC_NEW(yvec_at(list, 3), list->len - 3, yexpr_t);
-    yfunc_t func;
     yvec_t arg_descs = YVEC_NEW(NULL, args->value.list.len, yarg_desc_t);
     for (size_t i = 0; i < args->value.list.len; i++) {
         yarg_desc_t desc;
@@ -109,35 +109,146 @@
     func_value.typ = YVALUE_FUNC;
     func_value.value.func = func;
 
-    yvalue_set(name->value.ref, &func_value);
+    *newref = yvalue_set(YVALUE_INSERT, &func_value);
 
-    // Clean up and return.
     yexpr_destroy(defn);
     yexpr_destroy(args);
     yvec_destroy(list);
-    *expr = yexpr_new();  // no destroy!
+
+    *expr = yexpr_new();
     return true;
 }
 
-void ypreprocess_defn(yexpr_t* expr) {
-    // `defn`s are already translated to built-in exprs by the resolve_builtins
-    // stage. Now we extract them, store them as func values into the value
-    // table, and replace them with `undef` expressions that will not be
-    // executed.
+typedef struct {
+    ystr_t sym;
+    yref_t ref;
+} ypreprocess_existing_t;
+
+/// References in a scope.
+typedef struct {
+    /// Vector of ypreprocess_existing_t
+    yvec_t refs;
+} ypreprocess_scope_t;
+
+static yref_t ypreprocess_find_or_create_ref(
+    ystr_t* symbol, yvec_t* /* of ypreprocess_scope_t* */ scope_stack) {
+    // Search for symbol in stack, starting at top-most scope (enclosing list)
+    // and working upwards.
+    for (size_t i = 0; i < scope_stack->len; i++) {
+        size_t index = scope_stack->len - 1 - i;
+        ypreprocess_scope_t* scope =
+            *YVEC_AT(scope_stack, index, ypreprocess_scope_t*);
+
+        for (size_t ref = 0; ref < scope->refs.len; ref++) {
+            ypreprocess_existing_t* candidate =
+                YVEC_AT(&scope->refs, ref, ypreprocess_existing_t);
+            if (0 == ystr_cmp(symbol, &candidate->sym)) {
+                return candidate->ref;
+            }
+        }
+    }
+
+    // No existing reference found! Create one in the current scope.
+    yref_t ref = yref_new();
+    // For all new references, we define them in the scope above the current
+    // list (thus "- 2"), e.g. in let, defn
+    ypreprocess_scope_t* topscope =
+        *YVEC_AT(scope_stack, scope_stack->len - 2, ypreprocess_scope_t*);
+    ypreprocess_existing_t newref = {.sym = ystr_clone(symbol), .ref = ref};
+    YVEC_PUSH(&topscope->refs, &newref);
+    return ref;
+}
+
+static void ypreprocess_refs_recursive(
+    yvec_t* /* of ypreprocess_scope_t* */ scope_stack, yexpr_t* expr) {
+    // Abort cheaply for types we don't want to resolve anyway.
     switch (expr->typ) {
+        case YEXPR_UNDEF:
+        case YEXPR_BUILTIN:
+        case YEXPR_NUMBER:
+        case YEXPR_STRING:
+        case YEXPR_ATOM:
+        case YEXPR_EXCEPTION:
+            return;
+        default:
+            break;
+    }
+
+    ypreprocess_scope_t scope;
+    ypreprocess_scope_t* scopep = &scope;
+
+    switch (expr->typ) {
+        case YEXPR_REF:
+            if (yref_type(&expr->value.ref) == YREF_SYM) {
+                ystr_t* sym = &expr->value.ref.ref.sym;
+                yref_t resolved =
+                    ypreprocess_find_or_create_ref(sym, scope_stack);
+                yexpr_set_ref(expr, resolved);
+            }
+            break;
         case YEXPR_LIST:
+            // Create new scope for list.
+            scope.refs = YVEC_NEW(NULL, 4, ypreprocess_existing_t);
+            YVEC_PUSH(scope_stack, &scopep);
+
+            // Handle functions.
             if (expr->value.list.len >= 4) {
-                yexpr_t* first = YVEC_AT(&expr->value.list, 0, yexpr_t);
-                if (first->typ == YEXPR_BUILTIN &&
-                    first->value.builtin == YBUILTIN_DEFN) {
-                    ypreprocess_compile_defn(expr);
-                    return;
+                yexpr_t* head = YVEC_AT(&expr->value.list, 0, yexpr_t);
+                if (head->typ == YEXPR_BUILTIN &&
+                    head->value.builtin == YBUILTIN_DEFN) {
+                    // Compile and store function. Push symbol->reference
+                    // mapping onto scope stack.
+                    yref_t new_ref;
+                    // TODO: handle errors better
+                    assert(ypreprocess_compile_defn(expr, &new_ref));
+                    yvalue_t* func = yvalue_get(new_ref);
+                    assert(func != NULL);
+
+                    ypreprocess_existing_t funcref = {
+                        .sym = ystr_clone(&func->value.func.name),
+                        .ref = new_ref};
+                    YVEC_PUSH(&(*YVEC_AT(scope_stack, scope_stack->len - 2,
+                                         ypreprocess_scope_t*))
+                                   ->refs,
+                              &funcref);
+
+                    yvec_t* func_body = &func->value.func.body;
+                    for (size_t i = 0; i < func_body->len; i++) {
+                        ypreprocess_refs_recursive(
+                            scope_stack, YVEC_AT(func_body, i, yexpr_t));
+                    }
+
+                    goto cleanup;
                 }
             }
-            for (size_t i = 0; i < expr->value.list.len; i++)
-                ypreprocess_defn(YVEC_AT(&expr->value.list, i, yexpr_t));
+
+            // Resolve list elements recursively.
+            for (size_t i = 0; i < expr->value.list.len; i++) {
+                yexpr_t* elem = YVEC_AT(&expr->value.list, i, yexpr_t);
+                ypreprocess_refs_recursive(scope_stack, elem);
+            }
+
+        cleanup:
+            // Clean up.
+            yvec_pop(scope_stack, NULL);
+            for (size_t i = 0; i < scope.refs.len; i++) {
+                ypreprocess_existing_t* ex =
+                    YVEC_AT(&scope.refs, i, ypreprocess_existing_t);
+                ystr_destroy(&ex->sym);
+                yref_destroy(&ex->ref);
+            }
+            yvec_destroy(&scope.refs);
             break;
         default:
-            return;
+            break;
     }
 }
+
+// Initial invocation must be of type YEXPR_LIST.
+void ypreprocess_refs(yexpr_t* expr) {
+    yvec_t scope_stack = YVEC_NEW(NULL, 16, ypreprocess_scope_t*);
+    ypreprocess_refs_recursive(&scope_stack, expr);
+    assert(scope_stack.len == 0);
+    yvec_destroy(&scope_stack);
+}
+
--- a/src/preprocess.h	Tue Sep 03 09:40:21 2019 +0200
+++ b/src/preprocess.h	Tue Sep 03 09:41:17 2019 +0200
@@ -23,19 +23,10 @@
 void ypreprocess_resolve_builtins(yexpr_t* expr);
 
 /**
- * @brief Extract functions and remove `defn` expressions.
+ * @brief Replace symbolic IDs with per-scope-unique numeric IDs.
  *
  * Run order: 2
  */
-void ypreprocess_defn(yexpr_t* expr);
-
-/**
- * @brief Replace symbolic IDs with per-scope-unique numeric IDs.
- *
- * Run order: 3
- *
- * NOTE: should not translate refs in e.g. defn (function name)
- */
 void ypreprocess_refs(yexpr_t* expr);
 
 /**