[as2api-dev] [CVS trunk] Move AS-lexer-specifics to a new file, leaving a small core of reusable code.

David Holroyd dave at badgers-in-foil.co.uk
Tue, 12 Jul 2005 21:06:33 +0000


<html>
<head>
<style><!--
  body {background-color:#ffffff;}
  .file {border:1px solid #eeeeee;margin-top:1em;margin-bottom:1em;}
  .pathname {font-family:monospace; float:right;}
  .fileheader {margin-bottom:.5em;}
  .diff {margin:0;}
  .tasklist {padding:4px;border:1px dashed #000000;margin-top:1em;}
  .tasklist ul {margin-top:0;margin-bottom:0;}
  tr.alt {background-color:#eeeeee}
  #added {background-color:#ddffdd;}
  #addedchars {background-color:#99ff99;font-weight:bolder;}
  tr.alt #added {background-color:#ccf7cc;}
  #removed {background-color:#ffdddd;}
  #removedchars {background-color:#ff9999;font-weight:bolder;}
  tr.alt #removed {background-color:#f7cccc;}
  #info {color:#888888;}
  #context {background-color:#eeeeee;}
  td {padding-left:.3em;padding-right:.3em;}
  tr.head {border-bottom-width:1px;border-bottom-style:solid;}
  tr.head td {padding:0;padding-top:.2em;}
  .task {background-color:#ffff00;}
  .comment {padding:4px;border:1px dashed #000000;background-color:#ffffdd}
  .error {color:red;}
  hr {border-width:0px;height:2px;background:black;}
--></style>
</head>
<body>
<table cellspacing="0" cellpadding="0" border="0" rules="cols">
<tr class="head"><td colspan="4">Commit in <b><tt>trunk/as2api</tt></b><span id="info"> on MAIN</span></td></tr>
<tr><td><tt><a href="#file1">api_loader.rb</a></tt></td><td align="right" id="added">+1</td><td align="right" id="removed">-1</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></td></tr>
<tr class="alt"><td><tt>parse/<a href="#file2"><span id="added">aslexer.rb</span></a></tt></td><td align="right" id="added">+372</td><td></td><td nowrap="nowrap" align="right">added <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></td></tr>
<tr><td><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;/<a href="#file3">lexer.rb</a></tt></td><td align="right" id="added">+37</td><td align="right" id="removed">-382</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></td></tr>
<tr class="alt"><td><tt><a href="#file4">tc_actionscript_lexer.rb</a></tt></td><td align="right" id="added">+1</td><td align="right" id="removed">-1</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></td></tr>
<tr><td></td><td align="right" id="added">+411</td><td align="right" id="removed">-384</td><td></td></tr>
</table>
<small id="info">1 added + 3 modified, total 4 files</small><br />
<div class="tasklist"><ul>
<li><a href="#task1">TODO Create an EOFToken (so that we can report its line number)</a></li>
<li><a href="#task2">TODO: whitespace tokens don't span lines, which might not be the expected</a></li>
</ul></div>
<pre class="comment">
Move AS-lexer-specifics to a new file, leaving a small core of reusable code.

I'm hoping that this small core can be used to write a better doc-comment
parser.  Even if that doesn't work out, I think this new arrangement is a
little cleaner.
</pre>
<hr /><a name="file1" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a></span><br />
<div class="fileheader"><big><b>api_loader.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/api_loader.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/api_loader.rb	2005-07-12 17:16:15 UTC (rev 193)
+++ trunk/as2api/api_loader.rb	2005-07-12 21:06:32 UTC (rev 194)
@@ -1,5 +1,5 @@
</small></pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="removed">-require 'parse/lexer'
</pre><pre class="diff" id="added">+require 'parse/<span id="addedchars">as</span>lexer'
</pre><pre class="diff" id="context"> require 'parse/parser'
 require 'api_model'
 require 'doc_comment'
</pre></div>
<hr /><a name="file2" /><div class="file">
<span class="pathname" id="added"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader" id="added"><big><b>aslexer.rb</b></big> <small id="info">added at <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/aslexer.rb	2005-07-12 17:16:15 UTC (rev 193)
+++ trunk/as2api/parse/aslexer.rb	2005-07-12 21:06:32 UTC (rev 194)
@@ -0,0 +1,372 @@
</small></pre><pre class="diff" id="added">+
+require 'parse/lexer'
+
+module ActionScript
+module Parse
+
<a name="task1" />+# <span class="task">TODO</span> Create an EOFToken (so that we can report its line number)
+
+class ASToken
+  def initialize(body, lineno)
+    @body = body
+    @lineno = lineno
+  end
+  def body
+    @body
+  end
+  def lineno
+    @lineno
+  end
+  def to_s
+    @body
+  end
+end
+
+class CommentToken &lt; ASToken
+end
+
+class NumberToken &lt; CommentToken
+end
+
+class SingleLineCommentToken &lt; CommentToken
+  def to_s
+    "//#{@body}"
+  end
+end
+
+class MultiLineCommentToken &lt; CommentToken
+  def to_s
+    "/*#{@body}*/"
+  end
+end
+
+class WhitespaceToken &lt; ASToken
+end
+
+class IdentifierToken &lt; ASToken
+end
+
+class StringToken &lt; ASToken
+  def initialize(body, lineno)
+    @body = unescape(body)
+    @lineno = lineno
+  end
+
+  def to_s
+    "\"#{escape(@body)}\""
+  end
+
+  def escape(text)
+    text.gsub(/./m) do
+      case $&amp;
+        when "\\" then "\\\\"
+        when "\"" then "\\\""
+	when "\n" then "\\n"
+	when "\t" then "\\t"
+        else $&amp;
+      end
+    end
+  end
+
+  def unescape(text)
+    escape = false
+    text.gsub(/./) do
+      if escape
+        escape = false
+        case $&amp;
+          when "\\" then "\\"
+          when "n" then "\n"
+          when "t" then "\t"
+          else $&amp;
+        end
+      else
+        case $&amp;
+	  when "\\" then escape=true; ""
+	  else $&amp;
+	end
+      end
+    end
+  end
+end
+
+# "get" and "set" where initially included in this list, since they are used
+# as modifiers to function declarations.  The are also allowed to appear as
+# identifiers, unfortunately, so we treat them as such, and have the parser
+# make special checks on the identifier body.
+Keywords = [
+  "as",
+  "break",
+  "case",
+  "catch",
+  "class",
+  "const",
+  "continue",
+  "default",
+  "dynamic",     # non-ECMA
+  "delete",
+  "do",
+  "else",
+  "extends",
+  "false",
+  "finally",
+  "for",
+  "function",
+  "if",
+  "implements",  # reserved, but unused in ECMA
+  "import",
+  "in",
+  "instanceof",
+  "interface",   # reserved, but unused in ECMA
+  "intrinsic",   # non-ECMA
+#  "is",         # not a keyword in AS
+#  "namespace",  # not a keyword in AS
+  "new",
+  "null",
+  "package",
+  "private",
+  "public",
+  "return",
+  "static",      # non-ECMA
+  "super",
+  "switch",
+  "this",
+  "throw",
+  "true",
+  "try",
+  "typeof",
+  "use",
+  "var",
+  "void",
+  "while",
+  "with"
+]
+
+Reserved = [
+  "abstract",
+  "debugger",
+  "enum",
+  "export",
+  "goto",
+  "native",
+  "protected",
+  "synchronized",
+  "throws",
+  "transient",
+  "volatile"
+]
+
+Punctuation = [
+  [:DivideAssign,         "/="],
+  [:Divide,               "/"],
+  [:BitNot,               "~"],
+  [:RBrace,               "}"],
+  [:OrAssign,             "||="],
+  [:Or,                   "||"],
+  [:BitOrAssign,          "|="],
+  [:BitOr,                "|"],
+  [:LBrace,               "{"],
+  [:XOrAssign,            "^^="],
+  [:XOr,                  "^^"],
+  [:BitXOrAssign,         "^="],
+  [:BitXOr,               "^"],
+  [:RBracket,             "]"],
+  [:LBracket,             "["],
+  [:Hook,                 "?"],
+  [:RShiftUnsignedAssign, "&gt;&gt;&gt;="],
+  [:RShiftUnsigned,       "&gt;&gt;&gt;"],
+  [:RShiftAssign,         "&gt;&gt;="],
+  [:RShift,               "&gt;&gt;"],
+  [:GreaterEquals,        "&gt;="],
+  [:Greater,              "&gt;"],
+  [:Same,                 "==="],
+  [:Equals,               "=="],
+  [:Assign,               "="],
+  [:LessEquals,           "&lt;="],
+  [:LShiftAssign,         "&lt;&lt;="],
+  [:LShift,               "&lt;&lt;"],
+  [:Less,                 "&lt;"],
+  [:Semicolon,            ";"],
+  [:Member,               "::"],
+  [:Colon,                ":"],
+  [:Ellipsis,             "..."],
+  [:Dot,                  "."],
+  [:MinusAssign,          "-="],
+  [:Decrement,            "--"],
+  [:Minus,                "-"],
+  [:Comma,                ","],
+  [:PlusAssign,           "+="],
+  [:Increment,            "++"],
+  [:Plus,                 "+"],
+  [:StarAssign,           "*="],
+  [:Star,                 "*"],
+  [:RParen,               ")"],
+  [:LParen,               "("],
+  [:BitAndAssign,         "&amp;="],
+  [:AndAssign,            "&amp;&amp;="],
+  [:And,                  "&amp;&amp;"],
+  [:BitAnd,               "&amp;"],
+  [:ModuloAssign,         "%="],
+  [:Modulo,               "%"],
+  [:BangSame,             "!=="],
+  [:BangEquals,           "!="],
+  [:Bang,                 "!"]
+]
+
+  h =		"[0-9a-fA-F]"
+  nl =		"\\n|\\r\\n|\\r|\\f"
+  nonascii =	"[\\200-\\377]"
+  unicode =	"\\\\#{h}{1,6}[ \\t\\r\\n\\f]?"
+  escape =	"(?:#{unicode}|\\\\[ -~\\200-\\377])"
+  nmstart =	"(?:[a-zA-Z_$]|#{nonascii}|#{escape})"
+  nmchar =	"(?:[a-zA-Z0-9_$]|#{nonascii}|#{escape})"
+  SINGLE_LINE_COMMENT = "//([^\n\r]*)"
+  OMULTI_LINE_COMMENT = "/\\*"
+  CMULTI_LINE_COMMENT = "\\*/"
+  STRING_START1 = "'"
+  STRING_END1 = "((?:(?:\\\\')|[\\t !\#$%&amp;(-~]|#{nl}|\"|#{nonascii}|#{escape})*)\'"
+  STRING_START2 = '"'
+  STRING_END2 = "((?:(?:\\\\\")|[\\t !\#$%&amp;(-~]|#{nl}|'|#{nonascii}|#{escape})*)\""
+  WHITESPACE = "[ \t\r\n\f]+"
+
+
+  IDENT =	"#{nmstart}#{nmchar}*"
+#  name =	"#{nmchar}+"
+  NUM	 =	"[0-9]+|[0-9]*\\.[0-9]+"
+#  string =	"#{string1}|#{string2}"
+  w =		"[ \t\r\n\f]*"
+
+class ASLexer &lt; AbstractLexer
+
+
+  def lex_simple_token(class_sym, match, io)
+    ActionScript::Parse.const_get(class_sym).new(io.lineno)
+  end
+
+  def lex_simplebody_token(class_sym, match, io)
+    ActionScript::Parse.const_get(class_sym).new(match[0], io.lineno)
+  end
+
+  def lex_singlelinecoomment_token(class_sym, match, io)
+    SingleLineCommentToken.new(match[1], io.lineno)
+  end
+
+  def lex_multilinecomment_token(class_sym, match, io)
+    lineno = io.lineno
+    line = match.post_match
+    comment = ''
+    until line =~ /\*\//o
+      comment &lt;&lt; line
+      line = io.readline;
+    end
+    comment &lt;&lt; $`
+    match.string = $'
+    MultiLineCommentToken.new(comment, lineno)
+  end
+
+  def lex_string1_token(class_sym, match, io)
+    lineno = io.lineno
+    line = match.post_match
+    str = ''
+    until line =~ /#{STRING_END1}/o
+      str &lt;&lt; line
+      line = io.readline;
+      raise "#{lineno}:unexpected EOF in string" if line.nil?
+    end
+    str &lt;&lt; $1
+    match.string = $'
+    StringToken.new(str, lineno)
+  end
+
+  def lex_string2_token(class_sym, match, io)
+    lineno = io.lineno
+    line = match.post_match
+    str = ''
+    until line =~ /#{STRING_END2}/o
+      str &lt;&lt; line
+      line = io.readline;
+      raise "#{lineno}:unexpected EOF in string" if line.nil?
+    end
+    str &lt;&lt; $1
+    match.string = $'
+    StringToken.new(str, lineno)
+  end
+
+
+end
+
+def self.build_lexer
+  builder = LexerBuilder.new
+
<a name="task2" />+  # <span class="task">TODO</span>: whitespace tokens don't span lines, which might not be the expected
+  #       behaviour
+  builder.add_match(WHITESPACE, :lex_simplebody_token, :WhitespaceToken)
+
+  builder.add_match(SINGLE_LINE_COMMENT, :lex_singlelinecoomment_token, :SingleLineCommentToken)
+
+  builder.add_match(OMULTI_LINE_COMMENT, :lex_multilinecomment_token, :MultiLineCommentToken)
+
+  Keywords.each do |keyword|
+    builder.make_keyword_token(keyword)
+  end
+
+  Punctuation.each do |punct|
+    builder.make_punctuation_token(*punct)
+  end
+
+  builder.add_match(IDENT, :lex_simplebody_token, :IdentifierToken)
+
+  builder.add_match(STRING_START1, :lex_string1_token, :StringToken)
+
+  builder.add_match(STRING_START2, :lex_string2_token, :StringToken)
+
+  builder.add_match(NUM, :lex_simplebody_token, :NumberToken)
+
+  builder.build_lexer(ASLexer)
+end
+
+build_lexer
+
+class SkipASLexer
+  def initialize(lexer)
+    @lex = lexer
+    @handler = nil
+  end
+
+  def handler=(handler)
+    @handler = handler
+  end
+
+  def get_next
+    while skip?(tok=@lex.get_next)
+      notify(tok)
+    end
+    tok
+  end
+
+  def peek_next
+    while skip?(tok=@lex.peek_next)
+      notify(tok)
+      @lex.get_next
+    end
+    tok
+  end
+
+  protected
+
+  def skip?(tok)
+    tok.is_a?(CommentToken) || tok.is_a?(WhitespaceToken)
+  end
+
+  def notify(tok)
+    unless @handler.nil?
+      @handler.comment(tok.body)
+    end
+  end
+end
+
+
+end  # module Parse
+end  # module ActionScript
+
+
+# vim:shiftwidth=2:softtabstop=2
</pre></div>
<hr /><a name="file3" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader"><big><b>lexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/lexer.rb	2005-07-12 17:16:15 UTC (rev 193)
+++ trunk/as2api/parse/lexer.rb	2005-07-12 21:06:32 UTC (rev 194)
@@ -4,224 +4,9 @@
</small></pre><pre class="diff" id="context"> module ActionScript
 module Parse
 
</pre><pre class="diff" id="removed">-# TODO Create an EOFToken (so that we can report its line number)
</pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="removed">-class ASToken
-  def initialize(body, lineno)
-    @body = body
-    @lineno = lineno
-  end
-  def body
-    @body
-  end
-  def lineno
-    @lineno
-  end
-  def to_s
-    @body
-  end
-end
</pre><pre class="diff" id="added">+class AbstractLexer
</pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="removed">-class CommentToken &lt; ASToken
-end
-
-class NumberToken &lt; CommentToken
-end
-
-class SingleLineCommentToken &lt; CommentToken
-  def to_s
-    "//#{@body}"
-  end
-end
-
-class MultiLineCommentToken &lt; CommentToken
-  def to_s
-    "/*#{@body}*/"
-  end
-end
-
-class WhitespaceToken &lt; ASToken
-end
-
-class IdentifierToken &lt; ASToken
-end
-
-class StringToken &lt; ASToken
-  def initialize(body, lineno)
-    @body = unescape(body)
-    @lineno = lineno
-  end
-
-  def to_s
-    "\"#{escape(@body)}\""
-  end
-
-  def escape(text)
-    text.gsub(/./m) do
-      case $&amp;
-        when "\\" then "\\\\"
-        when "\"" then "\\\""
-	when "\n" then "\\n"
-	when "\t" then "\\t"
-        else $&amp;
-      end
-    end
-  end
-
-  def unescape(text)
-    escape = false
-    text.gsub(/./) do
-      if escape
-        escape = false
-        case $&amp;
-          when "\\" then "\\"
-          when "n" then "\n"
-          when "t" then "\t"
-          else $&amp;
-        end
-      else
-        case $&amp;
-	  when "\\" then escape=true; ""
-	  else $&amp;
-	end
-      end
-    end
-  end
-end
-
-# "get" and "set" where initially included in this list, since they are used
-# as modifiers to function declarations.  The are also allowed to appear as
-# identifiers, unfortunately, so we treat them as such, and have the parser
-# make special checks on the identifier body.
-Keywords = [
-  "as",
-  "break",
-  "case",
-  "catch",
-  "class",
-  "const",
-  "continue",
-  "default",
-  "dynamic",     # non-ECMA
-  "delete",
-  "do",
-  "else",
-  "extends",
-  "false",
-  "finally",
-  "for",
-  "function",
-  "if",
-  "implements",  # reserved, but unused in ECMA
-  "import",
-  "in",
-  "instanceof",
-  "interface",   # reserved, but unused in ECMA
-  "intrinsic",   # non-ECMA
-#  "is",         # not a keyword in AS
-#  "namespace",  # not a keyword in AS
-  "new",
-  "null",
-  "package",
-  "private",
-  "public",
-  "return",
-  "static",      # non-ECMA
-  "super",
-  "switch",
-  "this",
-  "throw",
-  "true",
-  "try",
-  "typeof",
-  "use",
-  "var",
-  "void",
-  "while",
-  "with"
-]
-
-Reserved = [
-  "abstract",
-  "debugger",
-  "enum",
-  "export",
-  "goto",
-  "native",
-  "protected",
-  "synchronized",
-  "throws",
-  "transient",
-  "volatile"
-]
-
-Punctuation = [
-  [:DivideAssign,         "/="],
-  [:Divide,               "/"],
-  [:BitNot,               "~"],
-  [:RBrace,               "}"],
-  [:OrAssign,             "||="],
-  [:Or,                   "||"],
-  [:BitOrAssign,          "|="],
-  [:BitOr,                "|"],
-  [:LBrace,               "{"],
-  [:XOrAssign,            "^^="],
-  [:XOr,                  "^^"],
-  [:BitXOrAssign,         "^="],
-  [:BitXOr,               "^"],
-  [:RBracket,             "]"],
-  [:LBracket,             "["],
-  [:Hook,                 "?"],
-  [:RShiftUnsignedAssign, "&gt;&gt;&gt;="],
-  [:RShiftUnsigned,       "&gt;&gt;&gt;"],
-  [:RShiftAssign,         "&gt;&gt;="],
-  [:RShift,               "&gt;&gt;"],
-  [:GreaterEquals,        "&gt;="],
-  [:Greater,              "&gt;"],
-  [:Same,                 "==="],
-  [:Equals,               "=="],
-  [:Assign,               "="],
-  [:LessEquals,           "&lt;="],
-  [:LShiftAssign,         "&lt;&lt;="],
-  [:LShift,               "&lt;&lt;"],
-  [:Less,                 "&lt;"],
-  [:Semicolon,            ";"],
-  [:Member,               "::"],
-  [:Colon,                ":"],
-  [:Ellipsis,             "..."],
-  [:Dot,                  "."],
-  [:MinusAssign,          "-="],
-  [:Decrement,            "--"],
-  [:Minus,                "-"],
-  [:Comma,                ","],
-  [:PlusAssign,           "+="],
-  [:Increment,            "++"],
-  [:Plus,                 "+"],
-  [:StarAssign,           "*="],
-  [:Star,                 "*"],
-  [:RParen,               ")"],
-  [:LParen,               "("],
-  [:BitAndAssign,         "&amp;="],
-  [:AndAssign,            "&amp;&amp;="],
-  [:And,                  "&amp;&amp;"],
-  [:BitAnd,               "&amp;"],
-  [:ModuloAssign,         "%="],
-  [:Modulo,               "%"],
-  [:BangSame,             "!=="],
-  [:BangEquals,           "!="],
-  [:Bang,                 "!"]
-]
-
-# This is a Lexer for the tokens of ActionScript 2.0.
-class ASLexer
-  # This is a naive lexer implementation that considers input line-by-line,
-  # with special cases to handle multiline tokens (strings, comments).
-  # spacial care must be taken to declaire tokens in the 'correct' order (as
-  # the fist match wins), and to cope with keyword/identifier ambiguity
-  # (keywords have '\b' regexp-lookahead appended)
-
-  @@matches = []
-
</pre><pre class="diff" id="context">   def initialize(io)
     @io = io
     @tokens = Array.new
</pre><pre class="diff"><small id="info">@@ -244,41 +29,46 @@
</small></pre><pre class="diff" id="context">     @tokens.shift
   end
 
</pre><pre class="diff" id="removed">-  private
</pre><pre class="diff" id="added">+  def check_fill
+    if @tokens.empty? &amp;&amp; !@io.eof?
+      fill()
+    end
+  end
</pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="removed">-  def ASLexer.make_match(match)
-    match.gsub("/", "\\/").gsub("\n", "\\n")
</pre><pre class="diff" id="added">+  def emit(token)
+    @tokens &lt;&lt; token
</pre><pre class="diff" id="context">   end
 
</pre><pre class="diff" id="removed">-  h =		"[0-9a-fA-F]"
-  nl =		"\\n|\\r\\n|\\r|\\f"
-  nonascii =	"[\\200-\\377]"
-  unicode =	"\\\\#{h}{1,6}[ \\t\\r\\n\\f]?"
-  escape =	"(?:#{unicode}|\\\\[ -~\\200-\\377])"
-  nmstart =	"(?:[a-zA-Z_$]|#{nonascii}|#{escape})"
-  nmchar =	"(?:[a-zA-Z0-9_$]|#{nonascii}|#{escape})"
-  SINGLE_LINE_COMMENT = "//([^\n\r]*)"
-  OMULTI_LINE_COMMENT = "/\\*"
-  CMULTI_LINE_COMMENT = "\\*/"
-  STRING_START1 = "'"
-  STRING_END1 = "((?:(?:\\\\')|[\\t !\#$%&amp;(-~]|#{nl}|\"|#{nonascii}|#{escape})*)\'"
-  STRING_START2 = '"'
-  STRING_END2 = "((?:(?:\\\\\")|[\\t !\#$%&amp;(-~]|#{nl}|'|#{nonascii}|#{escape})*)\""
-  WHITESPACE = "[ \t\r\n\f]+"
</pre><pre class="diff" id="added">+  def parse_error(text)
+    raise "#{@io.lineno}:no lexigraphic match for text starting '#{text}'"
+  end
+  def warn(message)
+    $stderr.puts(message)
+  end
+end
</pre><pre class="diff" id="context"> 
 
</pre><pre class="diff" id="removed">-  ident =	"#{nmstart}#{nmchar}*"
-#  name =	"#{nmchar}+"
-  num	 =	"[0-9]+|[0-9]*\\.[0-9]+"
-#  string =	"#{string1}|#{string2}"
-  w =		"[ \t\r\n\f]*"
</pre><pre class="diff" id="added">+# This is a Lexer for the tokens of ActionScript 2.0.
+class LexerBuilder
+  # This is a naive lexer implementation that considers input line-by-line,
+  # with special cases to handle multiline tokens (strings, comments).
+  # spacial care must be taken to declaire tokens in the 'correct' order (as
+  # the fist match wins), and to cope with keyword/identifier ambiguity
+  # (keywords have '\b' regexp-lookahead appended)
</pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="added">+  def initialize
+    @matches = []
+  end
</pre><pre class="diff" id="context"> 
</pre><pre class="diff" id="removed">-  def self.add_match(match, lex_meth_sym, tok_class_sym)
-    @@matches &lt;&lt; [make_match(match), lex_meth_sym, tok_class_sym]
</pre><pre class="diff" id="added">+  def make_match(match)
+    match.gsub("/", "\\/").gsub("\n", "\\n")
</pre><pre class="diff" id="context">   end
 
</pre><pre class="diff" id="removed">-  def self.make_simple_token(name, value, match)
</pre><pre class="diff" id="added">+  def add_match(match, lex_meth_sym, tok_class_sym)
+    @matches &lt;&lt; [make_match(match), lex_meth_sym, tok_class_sym]
+  end
+
+  def make_simple_token(name, value, match)
</pre><pre class="diff" id="context">     class_name = "#{name}Token"
     the_class = Class.new(ASToken)
     the_class.class_eval &lt;&lt;-EOE
</pre><pre class="diff"><small id="info">@@ -291,104 +81,21 @@
</small></pre><pre class="diff" id="context">     add_match(match, :lex_simple_token, class_name.to_sym)
   end
 
</pre><pre class="diff" id="removed">-  def lex_simple_token(class_sym, match, io)
-    ActionScript::Parse.const_get(class_sym).new(io.lineno)
-  end
-
-  def self.make_keyword_token(name)
</pre><pre class="diff" id="added">+  def make_keyword_token(name)
</pre><pre class="diff" id="context">     make_simple_token(name.capitalize, name, "#{name}\\b")
   end
 
</pre><pre class="diff" id="removed">-  # TODO: whitespace tokens don't span lines, which might not be the expected
-  #       behaviour
-  add_match(WHITESPACE, :lex_simplebody_token, :WhitespaceToken)
-
-  def lex_simplebody_token(class_sym, match, io)
-    ActionScript::Parse.const_get(class_sym).new(match[0], io.lineno)
-  end
-
-  add_match(SINGLE_LINE_COMMENT, :lex_singlelinecoomment_token, :SingleLineCommentToken)
-
-  def lex_singlelinecoomment_token(class_sym, match, io)
-    SingleLineCommentToken.new(match[1], io.lineno)
-  end
-
-  add_match(OMULTI_LINE_COMMENT, :lex_multilinecomment_token, :MultiLineCommentToken)
-
-  def lex_multilinecomment_token(class_sym, match, io)
-    lineno = io.lineno
-    line = match.post_match
-    comment = ''
-    until line =~ /\*\//o
-      comment &lt;&lt; line
-      line = io.readline;
-    end
-    comment &lt;&lt; $`
-    match.string = $'
-    MultiLineCommentToken.new(comment, lineno)
-  end
-
-  Keywords.each do |keyword|
-    make_keyword_token(keyword)
-  end
-
-  def self.make_punctuation_token(name, value)
</pre><pre class="diff" id="added">+  def make_punctuation_token(name, value)
</pre><pre class="diff" id="context">     make_simple_token(name, value, Regexp.escape(value))
   end
 
</pre><pre class="diff" id="removed">-  Punctuation.each do |punct|
-    make_punctuation_token(*punct)
-  end
-
-  add_match(ident, :lex_simplebody_token, :IdentifierToken)
-
-  add_match(STRING_START1, :lex_string1_token, :StringToken)
-
-  def lex_string1_token(class_sym, match, io)
-    lineno = io.lineno
-    line = match.post_match
-    str = ''
-    until line =~ /#{STRING_END1}/o
-      str &lt;&lt; line
-      line = io.readline;
-      raise "#{lineno}:unexpected EOF in string" if line.nil?
-    end
-    str &lt;&lt; $1
-    match.string = $'
-    StringToken.new(str, lineno)
-  end
-
-  add_match(STRING_START2, :lex_string2_token, :StringToken)
-
-  def lex_string2_token(class_sym, match, io)
-    lineno = io.lineno
-    line = match.post_match
-    str = ''
-    until line =~ /#{STRING_END2}/o
-      str &lt;&lt; line
-      line = io.readline;
-      raise "#{lineno}:unexpected EOF in string" if line.nil?
-    end
-    str &lt;&lt; $1
-    match.string = $'
-    StringToken.new(str, lineno)
-  end
-
-  add_match(num, :lex_simplebody_token, :NumberToken)
-
-  def check_fill
-    if @tokens.empty? &amp;&amp; !@io.eof?
-      fill()
-    end
-  end
-
-  def self.build_lexer
</pre><pre class="diff" id="added">+  def build_lexer(target_class)
</pre><pre class="diff" id="context">     text = &lt;&lt;-EOS
       def fill
         line = StringScanner.new(@io.readline)
         until line.eos?
     EOS
</pre><pre class="diff" id="removed">-    @<span id="removedchars">@</span>matches.each_with_index do |token_match, index|
</pre><pre class="diff" id="added">+    @matches.each_with_index do |token_match, index|
</pre><pre class="diff" id="context">       re, lex_method, tok_class = token_match
       text &lt;&lt; "if line.scan(/#{re}/)\n"
       text &lt;&lt; "  emit(#{lex_method.to_s}(:#{tok_class.to_s}, line, @io))\n"
</pre><pre class="diff"><small id="info">@@ -401,62 +108,10 @@
</small></pre><pre class="diff" id="context">         end
       end
     EOS
</pre><pre class="diff" id="removed">-    class_eval(text)
</pre><pre class="diff" id="added">+    target_class.class_eval(text)
</pre><pre class="diff" id="context">   end
 
</pre><pre class="diff" id="removed">-  self.build_lexer
-
-  public
-  def emit(token)
-    @tokens &lt;&lt; token
-  end
-
-  def parse_error(text)
-    raise "#{@io.lineno}:no lexigraphic match for text starting '#{text}'"
-  end
-  def warn(message)
-    $stderr.puts(message)
-  end
</pre><pre class="diff" id="context"> end
 
</pre><pre class="diff" id="removed">-
-class SkipASLexer
-  def initialize(lexer)
-    @lex = lexer
-    @handler = nil
-  end
-
-  def handler=(handler)
-    @handler = handler
-  end
-
-  def get_next
-    while skip?(tok=@lex.get_next)
-      notify(tok)
-    end
-    tok
-  end
-
-  def peek_next
-    while skip?(tok=@lex.peek_next)
-      notify(tok)
-      @lex.get_next
-    end
-    tok
-  end
-
-  protected
-
-  def skip?(tok)
-    tok.is_a?(CommentToken) || tok.is_a?(WhitespaceToken)
-  end
-
-  def notify(tok)
-    unless @handler.nil?
-      @handler.comment(tok.body)
-    end
-  end
-end
-
</pre><pre class="diff" id="context"> end # module Parse
 end # module ActionScript
</pre></div>
<hr /><a name="file4" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a></span><br />
<div class="fileheader"><big><b>tc_actionscript_lexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb?rev=193&amp;content-type=text/vnd.viewcvs-markup">193</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb.diff?r1=193&amp;r2=194">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/tc_actionscript_lexer.rb?rev=194&amp;content-type=text/vnd.viewcvs-markup">194</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/tc_actionscript_lexer.rb	2005-07-12 17:16:15 UTC (rev 193)
+++ trunk/as2api/tc_actionscript_lexer.rb	2005-07-12 21:06:32 UTC (rev 194)
@@ -1,5 +1,5 @@
</small></pre><pre class="diff" id="context"> require 'test/unit'
</pre><pre class="diff" id="removed">-require 'parse/lexer'
</pre><pre class="diff" id="added">+require 'parse/<span id="addedchars">as</span>lexer'
</pre><pre class="diff" id="context"> 
 class TC_ActionScriptLexer &lt; Test::Unit::TestCase
 
</pre></div>
<center><small><a href="http://www.badgers-in-foil.co.uk/projects/cvsspam/" title="commit -&gt; email">CVSspam</a> 0.2.11</small></center>
</body></html>