Spaces:
Sleeping
Sleeping
| #!/bin/sh | |
| # aside from this initial boilerplate, this is actually -*- scheme -*- code | |
| main='(module-ref (resolve-module '\''(scripts read-scheme-source)) '\'main')' | |
| exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" | |
| !# | |
| ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments | |
| ;; Copyright (C) 2001, 2006 Free Software Foundation, Inc. | |
| ;; | |
| ;; This program is free software; you can redistribute it and/or | |
| ;; modify it under the terms of the GNU General Public License as | |
| ;; published by the Free Software Foundation; either version 2, or | |
| ;; (at your option) any later version. | |
| ;; | |
| ;; This program is distributed in the hope that it will be useful, | |
| ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| ;; General Public License for more details. | |
| ;; | |
| ;; You should have received a copy of the GNU General Public License | |
| ;; along with this software; see the file COPYING. If not, write to | |
| ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| ;; Boston, MA 02110-1301 USA | |
| ;;; Author: Thien-Thi Nguyen | |
| ;;; Commentary: | |
| ;; Usage: read-scheme-source FILE1 FILE2 ... | |
| ;; | |
| ;; This program parses each FILE and writes to stdout sexps that describe the | |
| ;; top-level structures of the file: scheme forms, single-line comments, and | |
| ;; hash-bang comments. You can further process these (to associate comments | |
| ;; w/ scheme forms as a kind of documentation, for example). | |
| ;; | |
| ;; The output sexps have one of these forms: | |
| ;; | |
| ;; (quote (filename FILENAME)) | |
| ;; | |
| ;; (quote (comment :leading-semicolons N | |
| ;; :text LINE)) | |
| ;; | |
| ;; (quote (whitespace :text LINE)) | |
| ;; | |
| ;; (quote (hash-bang-comment :line LINUM | |
| ;; :line-count N | |
| ;; :text-list (LINE1 LINE2 ...))) | |
| ;; | |
| ;; (quote (following-form-properties :line LINUM | |
| ;; :line-count N) | |
| ;; :type TYPE | |
| ;; :signature SIGNATURE | |
| ;; :std-int-doc DOCSTRING)) | |
| ;; | |
| ;; SEXP | |
| ;; | |
| ;; The first four are straightforward (both FILENAME and LINE are strings sans | |
| ;; newline, while LINUM and N are integers). The last two always go together, | |
| ;; in that order. SEXP is scheme code processed only by `read' and then | |
| ;; `write'. | |
| ;; | |
| ;; The :type field may be omitted if the form is not recognized. Otherwise, | |
| ;; TYPE may be one of: procedure, alias, define-module, variable. | |
| ;; | |
| ;; The :signature field may be omitted if the form is not a procedure. | |
| ;; Otherwise, SIGNATURE is a list showing the procedure's signature. | |
| ;; | |
| ;; If the type is `procedure' and the form has a standard internal docstring | |
| ;; (first body form a string), that is extracted in full -- including any | |
| ;; embedded newlines -- and recorded by field :std-int-doc. | |
| ;; | |
| ;; | |
| ;; Usage from a program: The output list of sexps can be retrieved by scheme | |
| ;; programs w/o having to capture stdout, like so: | |
| ;; | |
| ;; (use-modules (scripts read-scheme-source)) | |
| ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...)) | |
| ;; | |
| ;; There are also two convenience procs exported for use by Scheme programs: | |
| ;; | |
| ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that | |
| ;; have the same number of leading semicolons. | |
| ;; | |
| ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse | |
| ;; the ":tags", and return alist of (TAG . VAL) elems. | |
| ;; | |
| ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles. | |
| ;; Make `annotate!' extensible. | |
| ;;; Code: | |
| (define-module (scripts read-scheme-source) | |
| :use-module (ice-9 rdelim) | |
| :export (read-scheme-source | |
| read-scheme-source-silently | |
| quoted? | |
| clump)) | |
| ;; Try to figure out what FORM is and its various attributes. | |
| ;; Call proc NOTE! with key (a symbol) and value. | |
| ;; | |
| (define (annotate! form note!) | |
| (cond ((and (list? form) | |
| (< 2 (length form)) | |
| (eq? 'define (car form)) | |
| (pair? (cadr form)) | |
| (symbol? (caadr form))) | |
| (note! ':type 'procedure) | |
| (note! ':signature (cadr form)) | |
| (and (< 3 (length form)) | |
| (string? (caddr form)) | |
| (note! ':std-int-doc (caddr form)))) | |
| ((and (list? form) | |
| (< 2 (length form)) | |
| (eq? 'define (car form)) | |
| (symbol? (cadr form)) | |
| (list? (caddr form)) | |
| (< 3 (length (caddr form))) | |
| (eq? 'lambda (car (caddr form))) | |
| (string? (caddr (caddr form)))) | |
| (note! ':type 'procedure) | |
| (note! ':signature (cons (cadr form) (cadr (caddr form)))) | |
| (note! ':std-int-doc (caddr (caddr form)))) | |
| ((and (list? form) | |
| (= 3 (length form)) | |
| (eq? 'define (car form)) | |
| (symbol? (cadr form)) | |
| (symbol? (caddr form))) | |
| (note! ':type 'alias)) | |
| ((and (list? form) | |
| (eq? 'define-module (car form))) | |
| (note! ':type 'define-module)) | |
| ;; Add other types here. | |
| (else (note! ':type 'variable)))) | |
| ;; Process FILE, calling NB! on parsed top-level elements. | |
| ;; Recognized: #!-!# and regular comments in addition to normal forms. | |
| ;; | |
| (define (process file nb!) | |
| (nb! `'(filename ,file)) | |
| (let ((hash-bang-rx (make-regexp "^#!")) | |
| (bang-hash-rx (make-regexp "^!#")) | |
| (all-comment-rx (make-regexp "^[ \t]*(;+)")) | |
| (all-whitespace-rx (make-regexp "^[ \t]*$")) | |
| (p (open-input-file file))) | |
| (let loop ((n (1+ (port-line p))) (line (read-line p))) | |
| (or (not n) | |
| (eof-object? line) | |
| (begin | |
| (cond ((regexp-exec hash-bang-rx line) | |
| (let loop ((line (read-line p)) | |
| (text (list line))) | |
| (if (or (eof-object? line) | |
| (regexp-exec bang-hash-rx line)) | |
| (nb! `'(hash-bang-comment | |
| :line ,n | |
| :line-count ,(1+ (length text)) | |
| :text-list ,(reverse | |
| (cons line text)))) | |
| (loop (read-line p) | |
| (cons line text))))) | |
| ((regexp-exec all-whitespace-rx line) | |
| (nb! `'(whitespace :text ,line))) | |
| ((regexp-exec all-comment-rx line) | |
| => (lambda (m) | |
| (nb! `'(comment | |
| :leading-semicolons | |
| ,(let ((m1 (vector-ref m 1))) | |
| (- (cdr m1) (car m1))) | |
| :text ,line)))) | |
| (else | |
| (unread-string line p) | |
| (let* ((form (read p)) | |
| (count (- (port-line p) n)) | |
| (props (let* ((props '()) | |
| (prop+ (lambda args | |
| (set! props | |
| (append props args))))) | |
| (annotate! form prop+) | |
| props))) | |
| (or (= count 1) ; ugh | |
| (begin | |
| (read-line p) | |
| (set! count (1+ count)))) | |
| (nb! `'(following-form-properties | |
| :line ,n | |
| :line-count ,count | |
| ,@props)) | |
| (nb! form)))) | |
| (loop (1+ (port-line p)) (read-line p))))))) | |
| ;;; entry points | |
| (define (read-scheme-source-silently . files) | |
| "See commentary in module (scripts read-scheme-source)." | |
| (let* ((res '())) | |
| (for-each (lambda (file) | |
| (process file (lambda (e) (set! res (cons e res))))) | |
| files) | |
| (reverse res))) | |
| (define (read-scheme-source . files) | |
| "See commentary in module (scripts read-scheme-source)." | |
| (for-each (lambda (file) | |
| (process file (lambda (e) (write e) (newline)))) | |
| files)) | |
| ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...)) | |
| ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...) | |
| ;; where the tags are symbols. | |
| ;; | |
| (define (quoted? sym form) | |
| (and (list? form) | |
| (= 2 (length form)) | |
| (eq? 'quote (car form)) | |
| (let ((inside (cadr form))) | |
| (and (list? inside) | |
| (< 0 (length inside)) | |
| (eq? sym (car inside)) | |
| (let loop ((ls (cdr inside)) (alist '())) | |
| (if (null? ls) | |
| alist ; retval | |
| (let ((first (car ls))) | |
| (or (symbol? first) | |
| (error "bad list!")) | |
| (loop (cddr ls) | |
| (acons (string->symbol | |
| (substring (symbol->string first) 1)) | |
| (cadr ls) | |
| alist))))))))) | |
| ;; Filter FORMS, combining contiguous comment forms that have the same number | |
| ;; of leading semicolons. Do not include in them whitespace lines. | |
| ;; Whitespace lines outside of such comment groupings are ignored, as are | |
| ;; hash-bang comments. All other forms are passed through unchanged. | |
| ;; | |
| (define (clump forms) | |
| (let loop ((forms forms) (acc '()) (pass-this-one-through? #f)) | |
| (if (null? forms) | |
| (reverse acc) ; retval | |
| (let ((form (car forms))) | |
| (cond (pass-this-one-through? | |
| (loop (cdr forms) (cons form acc) #f)) | |
| ((quoted? 'following-form-properties form) | |
| (loop (cdr forms) (cons form acc) #t)) | |
| ((quoted? 'whitespace form) ;;; ignore | |
| (loop (cdr forms) acc #f)) | |
| ((quoted? 'hash-bang-comment form) ;;; ignore for now | |
| (loop (cdr forms) acc #f)) | |
| ((quoted? 'comment form) | |
| => (lambda (alist) | |
| (let cloop ((inner-forms (cdr forms)) | |
| (level (assq-ref alist 'leading-semicolons)) | |
| (text (list (assq-ref alist 'text)))) | |
| (let ((up (lambda () | |
| (loop inner-forms | |
| (cons (cons level (reverse text)) | |
| acc) | |
| #f)))) | |
| (if (null? inner-forms) | |
| (up) | |
| (let ((inner-form (car inner-forms))) | |
| (cond ((quoted? 'comment inner-form) | |
| => (lambda (inner-alist) | |
| (let ((new-level | |
| (assq-ref | |
| inner-alist | |
| 'leading-semicolons))) | |
| (if (= new-level level) | |
| (cloop (cdr inner-forms) | |
| level | |
| (cons (assq-ref | |
| inner-alist | |
| 'text) | |
| text)) | |
| (up))))) | |
| (else (up))))))))) | |
| (else (loop (cdr forms) (cons form acc) #f))))))) | |
| ;;; script entry point | |
| (define main read-scheme-source) | |
| ;;; read-scheme-source ends here | |