Текст пузомерки на Лисп

Материал из MediaWiki
Перейти к навигации Перейти к поиску

Совет

Яничегонепонял. Напишите понятно, шо это такое и зачем нужно

Программа писана больше для личного употребления, поэтому у нее практически нет интерфейса. Файл для подсчета захардкоден в самом конце, в виде пути к файлу типа "advlib.lisp".

Для разбора различных языков используются макросы типа make-lisp-classifier, использование конкретного макроса тоже хардкодидся в последней строке.

Примечание: текст слегка не влез по ширине, поэтому советую скопипастить его для изучения и/или использования. Если кто-нибудь отформатирует, буду очень благодарен --Евгений Косенко 05-Jul-2008 17:25 MSD

(require 'lexer)
(in-package lexer)

(defun lex-class (class-def)
  (mapcar 
  #'(lambda (regex) `(,regex (return (values ,(car class-def) %0))))
  (cdr class-def)))

(defmacro defclassifier (name class-list skip-list)
  `(deflexer ,name
    :flex-compatible 
    ,@(mapcan #'lex-class class-list)
    ,@(mapcar #'list skip-list)))

(defclassifier make-ocaml-classifier
  ((:keyword
   .("!=" "%" "&" "&&" "," ":" "::" ";" ";;" "<" "<\\-" "=" ">" ">=" "@" "Array"
    "List" "Queue" "\\(" "\\)" "\\*" "\\+" "\\-" "\\->" "\\." "\\[" "\\]" "\\{"
    "\\|" "\\|\\|" "\\}" "assoc" "create" "do" "done" "else" "flush" "for" "fun"
    "function" "if" "in" "iter" "length" "let" "loop" "make_matrix" "map" "not"
    "pop" "push" "raise" "then" "to" "true" "try" "with" "filter" "make" "init"
    "string" "String" "index" "ignore" "rec" "open" "val" "bool" "int" "array"
    "when" "copy"))

  (:meanword
   .("\"[^\"]*\"" "'[^']*'" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*(')?")))

  ;skipword
  ("//.*$" "[:space:]+"))

(defclassifier make-c++-classifier
  ((:keyword
   .("!=" "#include" "%" "&" "," ":" "::" ";" "<" "<<" "=" ">" ">=" ">>" "\\("
    "\\)" "\\*" "\\+" "\\+\\+" "\\->" "\\." "\\[" "\\]" "\\{" "\\|\\|" "\\}"
    "bool" "class" "const" "for" "if" "int" "namespace" "operator" "return" "std"
    "struct" "try" "typedef" "using" "void"))

  (:meanword
   .("\"[^\"]*\"" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*")))

  ;skipword
  ("//.*$" "[:space:]+"))

(defclassifier make-python-classifier
  ((:keyword 
   .("\\|" "\\&" "\\\\" "\\+=" "==" "\\-" "\\{" "\\}" "!=" "%" "," ":" "<" "=" ">" ">=" "\\(" "\\)" "\\*" "\\+" "\\." 
    "\\[" "\\]"
    "__class__" "__init__" "__name__" "__unicode__" "append" "class" "cond" "def"
    "dict" "except" "for" "if" "import" "in" "join" "lambda" "len" "list" "print"
    "return" "setattr" "try" "unicode" "yield" "raise" "or" "pass" "xrange" "__str__" "break" 
    "None" "is" "while" "has_key" "from"))

  (:meanword
   .("`[^']+`" "\"[^\"]*\"" "(-)?[0-9]+" "[A-Za-z_][A-Za-z_]*" "u?'[^']*'")))

  ;skipword  
  ("#.*$" "[:space:]+"))

(defclassifier make-lisp-classifier
  ((:keyword 
   .("&body" "#\\." "\\-" "#[cC]" "1-" "\\." "#'" "%" "&key" "&optional" "'" "," ",@" "/" "1\\+" ":" ":test" "<" "<>" "="
    ">" ">=" "\\(" "\\)" "\\*" "\\+" "`" "and" "break" "caar" "cond" "defun" "eq"
    "equal" "gethash" "if" "in-package" "lambda" "length" "list" "make-hash-table"
    "make-instance" "not" "print" "progn" "push" "setf" "setq" "slot-value"
    "string=" "t" "terpri" "append" "reverse" "cadr" "min" "reduce" "assoc" "throw"
    "mapcan" "remove-if" "defmacro" "remove" "pairlis" "defconstant" "across" "read-line" "listen"
    "while" "upfrom" "with-open-file" "finally" "format" "princ" "complex" "let" "do" "to" "from" "for"
    "loop" "eql" "member" "null" "or" "imagpart" "realpart" "cons" "car" "cdr" "nil" "defvar"
    "eval" "eval" "mapcar" "&rest" "read" "with-open-stream" "return" "unless" "read-from-string"
    "string" "concatenate" "remove-dublicates" "#c" ":conc-name" "defctruct" "max" "case" "require"
    "initially" "remove-duplicates" "quote" ":count" "when" "upto" "\\(\\)" "count" "numberp"))

  (:meanword
   .("#\\\\." "\"[^\"]*\"" "(\\-)?[0-9]+" "[A-Za-z][A-Za-z0-9\\-\\+!]*" "[\\[]" "[\\]]")))

  ;skipword  
  (";.*$" "[:space:]+"))

(defun collect-stats (token-stream keywords meanwords trace-lex)
   (multiple-value-bind 
    (class value) (funcall token-stream)
    (cond ((eq class nil))
        (t (cond ((eq class :keyword)
              (setf (gethash value keywords) (1+ (gethash value keywords 0))))
              ((eq class :meanword)
              (setf (gethash value meanwords) (1+ (gethash value meanwords 0))))
              (t (throw :invalid-class (values nil class))))
          (cond
          (trace-lex
           (prin1 class)
           (prin1 " ")
           (prin1 value)
           (terpri)))
          (collect-stats token-stream keywords meanwords trace-lex)))))

(defun hash-sum (hash)
  (let ((sum 0)) 
   (maphash #'(lambda (k v) (declare (ignore k)) (incf sum v)) hash)
   sum))

(defun hash-keys (hash)
  (let ((keys ()))
   (maphash #'(lambda (k v) (declare (ignore v)) (push k keys)) hash) keys))

(defun % (part total)
  (values (round (float (* 100 (/ part total))))))

(defun lo-stats (token-stream trace-lex show-classes)
     (let 
      ((keywords (make-hash-table :test #'equal)) (meanwords (make-hash-table :test #'equal)))
    (collect-stats token-stream keywords meanwords trace-lex)
    (cond
     (show-classes
      (princ "Keywords: ")
      (prin1 (hash-keys keywords))
      (terpri)
      (princ "Meanwords: ")
      (prin1 (hash-keys meanwords))
      (terpri)))
    (values
     (hash-sum keywords)
     (hash-sum meanwords)
     (hash-table-count keywords)
     (hash-table-count meanwords))))

(defun hi-stats (token-stream &key (trace-lex nil) (show-classes nil))
  (multiple-value-bind 
  (keyword-length meaning-length keyword-thesaurus meaning-thesaurus)
  (lo-stats token-stream trace-lex show-classes)
  (let ((total-length (+ keyword-length meaning-length))
      (total-thesaurus (+ keyword-thesaurus meaning-thesaurus)))
    (list
    (cons "Total length" total-length)
    (cons "Meaning length" meaning-length)
    (cons "Total thesaurus" total-thesaurus)
    (cons "Meaning thesaurus" meaning-thesaurus)
    (cons "Total saturation (%)" (% meaning-length total-length))
    (cons "Thesaurus saturation (%)" (% meaning-thesaurus total-thesaurus))
    (cons "Total expressiveness (%)" (% total-thesaurus total-length))
    (cons "Meaning expressiveness (%)" 
        (if (> meaning-length 0) (% meaning-thesaurus meaning-length) 0))))))

(defun print-result (result-table)
  (mapcar
   #'(lambda (pair) (format t "~A~50T~7D~%" (car pair) (cdr pair)))
   result-table))

(defun read-content (file-name)
  (with-open-file 
  (s file-name)
  (let
     ((data (make-string (file-length s)))) 
    (read-sequence data s) data)))

(print-result (hi-stats (make-lisp-classifier (read-content "advworld.lisp"))))