[as2api-dev] [CVS trunk] A small lexer optimisation.

David Holroyd dave at badgers-in-foil.co.uk
Sun, 24 Jul 2005 22:59:40 +0000


<html>
<head>
<style><!--
  body {background-color:#ffffff;}
  .file {border:1px solid #eeeeee;margin-top:1em;margin-bottom:1em;}
  .pathname {font-family:monospace; float:right;}
  .fileheader {margin-bottom:.5em;}
  .diff {margin:0;}
  .tasklist {padding:4px;border:1px dashed #000000;margin-top:1em;}
  .tasklist ul {margin-top:0;margin-bottom:0;}
  tr.alt {background-color:#eeeeee}
  #added {background-color:#ddffdd;}
  #addedchars {background-color:#99ff99;font-weight:bolder;}
  tr.alt #added {background-color:#ccf7cc;}
  #removed {background-color:#ffdddd;}
  #removedchars {background-color:#ff9999;font-weight:bolder;}
  tr.alt #removed {background-color:#f7cccc;}
  #info {color:#888888;}
  #context {background-color:#eeeeee;}
  td {padding-left:.3em;padding-right:.3em;}
  tr.head {border-bottom-width:1px;border-bottom-style:solid;}
  tr.head td {padding:0;padding-top:.2em;}
  .task {background-color:#ffff00;}
  .comment {padding:4px;border:1px dashed #000000;background-color:#ffffdd}
  .error {color:red;}
  hr {border-width:0px;height:2px;background:black;}
--></style>
</head>
<body>
<table cellspacing="0" cellpadding="0" border="0" rules="cols">
<tr class="head"><td colspan="4">Commit in <b><tt>trunk/as2api/parse</tt></b><span id="info"> on MAIN</span></td></tr>
<tr><td><tt><a href="#file1">aslexer.rb</a></tt></td><td align="right" id="added">+20</td><td align="right" id="removed">-3</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=206&amp;content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb.diff?r1=206&amp;r2=207">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=207&amp;content-type=text/vnd.viewcvs-markup">207</a></td></tr>
<tr class="alt"><td><tt><a href="#file2">lexer.rb</a></tt></td><td align="right" id="added">+15</td><td align="right" id="removed">-1</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=206&amp;content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=206&amp;r2=207">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=207&amp;content-type=text/vnd.viewcvs-markup">207</a></td></tr>
<tr><td></td><td align="right" id="added">+35</td><td align="right" id="removed">-4</td><td></td></tr>
</table>
<small id="info">2 modified files</small><br />
<pre class="comment">
A small lexer optimisation.

Rather than having the lexer scan for every possible keyword token, simply
match all keywords / identifiers with the same rule, and have the lexical
action classify the input when the rule is matched.

This is a seedup because at any point in the input, the number of different
rules we have to test to recognise the next token is quite a lot smaller.
The lexical action can then try to lookup the matched text in a hashtable,
which should be faster that the previous linear search through all the
keyword.

My completely trivial test case went from about 37 seconds to 34 with this
change.
</pre>
<hr /><a name="file1" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader"><big><b>aslexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=206&amp;content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb.diff?r1=206&amp;r2=207">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=207&amp;content-type=text/vnd.viewcvs-markup">207</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/aslexer.rb	2005-07-18 16:31:52 UTC (rev 206)
+++ trunk/as2api/parse/aslexer.rb	2005-07-24 22:59:39 UTC (rev 207)
@@ -219,11 +219,24 @@
</small></pre><pre class="diff" id="context"> 
 class ASLexer &lt; AbstractLexer
 
</pre><pre class="diff" id="removed">-
</pre><pre class="diff" id="context">   def lex_simple_token(class_sym, match, io)
     ActionScript::Parse.const_get(class_sym).new(io.lineno-1)
   end
 
</pre><pre class="diff" id="added">+  def lex_key_or_ident_token(match, io)
+    body = match[0]
+    class_sym = @@keyword_tokens[body]
+    if class_sym
+      lex_simple_token(class_sym, match, io)
+    else
+      lex_simplebody_token(:IdentifierToken, match, io)
+    end
+  end
+
+  def self.keyword_tokens=(toks)
+    @@keyword_tokens = toks
+  end
+
</pre><pre class="diff" id="context">   def lex_simplebody_token(class_sym, match, io)
     ActionScript::Parse.const_get(class_sym).new(match[0], io.lineno-1)
   end
</pre><pre class="diff"><small id="info">@@ -287,15 +300,19 @@
</small></pre><pre class="diff" id="context"> 
   builder.add_match(OMULTI_LINE_COMMENT, :lex_multilinecomment_token, :MultiLineCommentToken)
 
</pre><pre class="diff" id="added">+  keyword_tokens = {}
</pre><pre class="diff" id="context">   Keywords.each do |keyword|
</pre><pre class="diff" id="removed">-    builder.make_keyword_token(keyword)
</pre><pre class="diff" id="added">+    builder.create_keytoken_class(keyword)
+    keyword_tokens[keyword] = "#{keyword.capitalize}Token".to_sym
</pre><pre class="diff" id="context">   end
 
</pre><pre class="diff" id="added">+  ASLexer.keyword_tokens = keyword_tokens
+
</pre><pre class="diff" id="context">   Punctuation.each do |punct|
     builder.make_punctuation_token(*punct)
   end
 
</pre><pre class="diff" id="removed">-  builder.add_match(IDENT, :lex_simplebody_token, :IdentifierToken)
</pre><pre class="diff" id="added">+  builder.add_match(IDENT, :lex_key_or_ident_token, nil)
</pre><pre class="diff" id="context"> 
   builder.add_match(STRING_START1, :lex_string1_token, :StringToken)
 
</pre></div>
<hr /><a name="file2" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader"><big><b>lexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=206&amp;content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=206&amp;r2=207">-&gt;</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=207&amp;content-type=text/vnd.viewcvs-markup">207</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/lexer.rb	2005-07-18 16:31:52 UTC (rev 206)
+++ trunk/as2api/parse/lexer.rb	2005-07-24 22:59:39 UTC (rev 207)
@@ -86,6 +86,16 @@
</small></pre><pre class="diff" id="context">     @matches &lt;&lt; [make_match(match), lex_meth_sym, tok_class_sym]
   end
 
</pre><pre class="diff" id="added">+  def create_keytoken_class(name)
+    the_class = Class.new(ASToken)
+    the_class.class_eval &lt;&lt;-EOE
+    def initialize(lineno)
+      super("#{name}", lineno)
+    end
+    EOE
+    ActionScript::Parse.const_set("#{name.capitalize}Token".to_sym, the_class)
+  end
+
</pre><pre class="diff" id="context">   def make_simple_token(name, value, match)
     class_name = "#{name}Token"
     the_class = Class.new(ASToken)
</pre><pre class="diff"><small id="info">@@ -116,7 +126,11 @@
</small></pre><pre class="diff" id="context">     @matches.each_with_index do |token_match, index|
       re, lex_method, tok_class = token_match
       text &lt;&lt; "if line.scan(/#{re}/)\n"
</pre><pre class="diff" id="removed">-      text &lt;&lt; "  emit(#{lex_method.to_s}(:#{tok_class.to_s}, line, @io))\n"
</pre><pre class="diff" id="added">+      if tok_class
+      	text &lt;&lt; "  emit(#{lex_method.to_s}(:#{tok_class.to_s}, line, @io))\n"
+      else
+      	text &lt;&lt; "  emit(#{lex_method.to_s}(line, @io))\n"
+      end
</pre><pre class="diff" id="context">       text &lt;&lt; "  next\n"
       text &lt;&lt; "end\n"
     end
</pre></div>
<center><small><a href="http://www.badgers-in-foil.co.uk/projects/cvsspam/" title="commit -&gt; email">CVSspam</a> 0.2.11</small></center>
</body></html>