Ambiguous identifier binding (adding a pass to the reader)

Hi All,

I am experimenting with adding a new pass to the Skething language.
Instead of

read -> expand -> compile -> evaluate 

I want to add an adjust pass:

read -> adjust -> expand -> compile -> evaluate 

It's relatively simple to do:

(module reader syntax/module-reader
  sketching/main
  #:read        read-sketching
  #:read-syntax read-sketching-syntax

  (define (read-sketching [in (current-input-port)])
    (adjust (read in)))

  (define (read-sketching-syntax [source-name (object-name (current-input-port))]
                                 [in (current-input-port)])
    (adjust (read-syntax source-name in)))

Now the goal of adjust is to detect id[expr ...] in the source and rewrite it to (ref id expr ...).
Whitespace is not allowed between the identifier and the expression.
Using the source location information and the paren-shape syntax property is was pretty simple to detect this situation.

My current problem is that I have run into the following error:

; #%top: identifier's binding is ambiguous
;   in: #%top
;   context...:
;    #(496045 module) #(496048 module reader) #(497571 module)
;    #(497574 module reader) #(520923 module)
;    #(520931 module test-sketching-reader) #(521041 local)
;    #(521042 intdef)
;   matching binding...:
;    #(sketching-top #<module-path-index:"exports-no-gui.rkt" "exports-all.rkt" sketching/main> 0)
;    #(520923 module) #(520931 module test-sketching-reader)
;   matching binding...:
;    #(#%top #<module-path-index:'#%core> 0)
;    #(496045 module) #(496048 module reader) #(497571 module)
;    #(497574 module reader)
;   matching binding...:
;    #(#%top #<module-path-index:'#%core> 0)
;    #(496045 module) #(496048 module reader)
; Context (plain; to see better errortrace context, re-run with C-u prefix):
;   /Users/soegaard/.emacs.d/elpa/racket-mode-20211018.1717/racket/syntax.rkt:66:0

The definition of adjust is as follows:

  (require racket/runtime-path racket/syntax
           (except-in syntax/parse char))
  
  (define (adjust stx)
    (syntax-parse stx
      [(a . d) (adjust-dotted-list stx)]
      [_       stx]))

  (define (ref . xs) ; placeholder
    (cons 'ref xs))
  
  (define (adjust-dotted-list stx)
    (syntax-parse stx
      [(id:id (~and [e:expr ...] brackets)  . more)
       (cond
         [(and (eqv? (syntax-property #'brackets 'paren-shape) #\[)
               (= (+ (syntax-position #'id) (syntax-span #'id))
                  (syntax-position #'brackets)))
          (with-syntax ([adjusted-more (adjust #'more)])
            (syntax/loc #'id
              (ref (id e ...) . adjusted-more)))]
         [else
          (with-syntax* ([(_ . rest) stx]
                         [adjusted-rest (adjust-dotted-list #'rest)])
            (syntax/loc stx
              (id . adjusted-rest)))])]
      [(a . more)
       (with-syntax ([adjusted-more (adjust #'more)])
         (syntax/loc stx
           (a . adjusted-more)))]
      [_
       (raise-syntax-error 'adjust-dotted-list "expected a dotted list" stx)]))

Where should I look?

It's relevant to mention that #lang sketching uses sketching-top as #%top.
It is defined in [1].

The full source:

(module reader syntax/module-reader
  ; 1. Module path of the language.
  sketching/main
  ;   The module path `sketching/main` is used in the language position
  ;   of read modules. That is, reading `#lang sketching` will produce a
  ;   module with `sketching/main` as language.

  ; 2. Reader options (#:read, #:read-syntax, etc. ...)
  ; Note: When #:read and #:read-syntax are used, they both need to be supplied.
  #:read        read-sketching
  #:read-syntax read-sketching-syntax

  ; 3. Forms as in the body of racket/base 

  ; After standard reading, we will rewrite
  ;      id[expr ...]
  ; to
  ;      (#%ref id expr ...).

  ; We will use this to index to vectors, strings and hash tables.
  
  (define (read-sketching [in (current-input-port)])
    (adjust (read in)))

  (define (read-sketching-syntax [source-name (object-name (current-input-port))]
                                 [in (current-input-port)])
    (adjust (read-syntax source-name in)))

  ; Since adjust is called after reading, we are essentially working with
  ; three passes.
  ;   - read-syntax
  ;   - adjust
  ;   - expand
  
  ; Let's define our `adjust` pass.

  (require racket/runtime-path racket/syntax
           (except-in syntax/parse char))
  ; (require (only-in sketching/main #%top))
  
  (define (read-string str #:source-name [source-name #f])
    (define in (open-input-string str))
    ; (port-count-lines! in)
    (read-syntax source-name in))
  
  (define (adjust stx)
    (syntax-parse stx
      [(a . d) (adjust-dotted-list stx)]
      [_       stx]))

  (define (ref . xs) ; placeholder
    (cons 'ref xs))
  
  (define (adjust-dotted-list stx)
    (syntax-parse stx
      [(id:id (~and [e:expr ...] brackets)  . more)
       (cond
         [(and (eqv? (syntax-property #'brackets 'paren-shape) #\[)
               (= (+ (syntax-position #'id) (syntax-span #'id))
                  (syntax-position #'brackets)))
          (with-syntax ([adjusted-more (adjust #'more)])
            (syntax/loc #'id
              (ref (id e ...) . adjusted-more)))]
         [else
          (with-syntax* ([(_ . rest) stx]
                         [adjusted-rest (adjust-dotted-list #'rest)])
            (syntax/loc stx
              (id . adjusted-rest)))])]
      [(a . more)
       (with-syntax ([adjusted-more (adjust #'more)])
         (syntax/loc stx
           (a . adjusted-more)))]
      [_
       (raise-syntax-error 'adjust-dotted-list "expected a dotted list" stx)]))

  ; > (displayln (adjust (read-string "(foo[bar])")))
  ; #<syntax:string::2 (ref (foo bar))>

  ; (displayln (adjust (read-string "(foo [bar])")))
  ; > #<syntax:string::1 (foo (bar))>
  
  )


And the definition of sketching-top is here:

[1] sketching/exports-no-gui.rkt at main · soegaard/sketching · GitHub

3 Likes

Normally, a language reader should produce a syntax object with no lexical context, because context is added by the expansion step.

When your language rewrites id[expr ...] to (ref id expr ...), what does ref mean there? Is it just a ref identifier that gets a binding from its context, similar to the implicit #%app? Or is it meant to refer always to a specific ref binding?

The former is conceptually simpler. The latter can work, and in that case your reader will produce ref syntax objects that have context. Still, you want to avoid adding context on other things, such as the parentheses wrapping the ref call, since that could lead to an ambiguous #%app.

You're getting an ambiguous #%top instead of an ambiguous #%app, though, so I may not have this quite right. Still, my best guess is that it's something about creating more syntax objects that already have context, and you'll probably need to use more (datum->syntax #f ....) than syntax/loc.

3 Likes

It's meant to work like the implicit #%app.

I'll try a version with datum->syntax.

The operation was successful - and the patient lived.

Since read and read-syntax work on individual expressions, the right place to apply adjust was the module wrapper.

The problem of the ambiguous #%app was solved with (datum->syntax #f ...) instead of syntax/loc.

Thanks for the pointer.

For reference:

(module reader syntax/module-reader
  ; 1. Module path of the language.
  sketching/main
  ;   The module path `sketching/main` is used in the language position
  ;   of read modules. That is, reading `#lang sketching` will produce a
  ;   module with `sketching/main` as language.

  ; 2. Reader options (#:read, #:read-syntax, etc. ...)
  #:module-wrapper (λ (thunk) (adjust (thunk)))

  ; 3. Forms as in the body of racket/base 

  ; After standard reading, we will rewrite
  ;      id[expr ...]
  ; to
  ;      (#%ref id expr ...).

  ; We will use this to index to vectors, strings and hash tables.
  

  ; Since adjust is called after reading, we are essentially working with
  ; three passes.
  ;   - read-syntax
  ;   - adjust
  ;   - expand
  
  ; Let's define our `adjust` pass.

  (require racket/runtime-path racket/syntax
           (except-in syntax/parse char))
  
  (define (read-string str #:source-name [source-name #f])
    (define in (open-input-string str))
    ; (port-count-lines! in)
    (read-syntax source-name in))
  
  (define (adjust stx)
    (syntax-parse stx
      [(a . d) (adjust-dotted-list stx)]
      [_       stx]))
  
  (define (adjust-dotted-list stx)    
    (syntax-parse stx
      [(id:id (~and [e:expr ...] brackets)  . more)
       (cond
         [(and (eqv? (syntax-property #'brackets 'paren-shape) #\[)
               (= (+ (syntax-position #'id) (syntax-span #'id))
                  (syntax-position #'brackets)))
          (let ([adjusted-more (adjust #'more)]
                [arguments     (syntax->list #'(id e ...))])
            (datum->syntax #f
                           `((#%ref ,@arguments) . ,adjusted-more)
                           stx))]
         [else
          (with-syntax ([(_ . rest) stx])
            (let ([adjusted-rest (adjust-dotted-list #'rest)])
              (datum->syntax #f
                             `(,#'id . ,adjusted-rest)
                             stx)))])]
      [(a . more)
       (let ([adjusted-a    (adjust #'a)]
             [adjusted-more (adjust #'more)])
         (datum->syntax #f
                        `(,adjusted-a . ,adjusted-more)
                        stx))]
      [_
       (raise-syntax-error 'adjust-dotted-list "expected a dotted list" stx)]))

  ; > (displayln (adjust (read-string "(foo[bar])")))
  ; #<syntax:string::2 (ref (foo bar))>

  ; > (displayln (adjust (read-string "(foo [bar])")))
  ; #<syntax:string::1 (foo (bar))>

  ; > (displayln (adjust (read-string "(foo v[1] bar)")))
  ; #<syntax:string::1 (foo (#%ref v 1) bar)>

  )
3 Likes

In some cases strip-context can probably be used too.

1 Like

I had forgotten about strip-context!

At least the datum->syntax solution saves a tree traversal.