[as2api-dev] [CVS trunk] A small lexer optimisation.
David Holroyd
dave at badgers-in-foil.co.uk
Sun, 24 Jul 2005 22:59:40 +0000
<html>
<head>
<style><!--
body {background-color:#ffffff;}
.file {border:1px solid #eeeeee;margin-top:1em;margin-bottom:1em;}
.pathname {font-family:monospace; float:right;}
.fileheader {margin-bottom:.5em;}
.diff {margin:0;}
.tasklist {padding:4px;border:1px dashed #000000;margin-top:1em;}
.tasklist ul {margin-top:0;margin-bottom:0;}
tr.alt {background-color:#eeeeee}
#added {background-color:#ddffdd;}
#addedchars {background-color:#99ff99;font-weight:bolder;}
tr.alt #added {background-color:#ccf7cc;}
#removed {background-color:#ffdddd;}
#removedchars {background-color:#ff9999;font-weight:bolder;}
tr.alt #removed {background-color:#f7cccc;}
#info {color:#888888;}
#context {background-color:#eeeeee;}
td {padding-left:.3em;padding-right:.3em;}
tr.head {border-bottom-width:1px;border-bottom-style:solid;}
tr.head td {padding:0;padding-top:.2em;}
.task {background-color:#ffff00;}
.comment {padding:4px;border:1px dashed #000000;background-color:#ffffdd}
.error {color:red;}
hr {border-width:0px;height:2px;background:black;}
--></style>
</head>
<body>
<table cellspacing="0" cellpadding="0" border="0" rules="cols">
<tr class="head"><td colspan="4">Commit in <b><tt>trunk/as2api/parse</tt></b><span id="info"> on MAIN</span></td></tr>
<tr><td><tt><a href="#file1">aslexer.rb</a></tt></td><td align="right" id="added">+20</td><td align="right" id="removed">-3</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=206&content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb.diff?r1=206&r2=207">-></a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=207&content-type=text/vnd.viewcvs-markup">207</a></td></tr>
<tr class="alt"><td><tt><a href="#file2">lexer.rb</a></tt></td><td align="right" id="added">+15</td><td align="right" id="removed">-1</td><td nowrap="nowrap" align="center"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=206&content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=206&r2=207">-></a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=207&content-type=text/vnd.viewcvs-markup">207</a></td></tr>
<tr><td></td><td align="right" id="added">+35</td><td align="right" id="removed">-4</td><td></td></tr>
</table>
<small id="info">2 modified files</small><br />
<pre class="comment">
A small lexer optimisation.
Rather than having the lexer scan for every possible keyword token, simply
match all keywords / identifiers with the same rule, and have the lexical
action classify the input when the rule is matched.
This is a seedup because at any point in the input, the number of different
rules we have to test to recognise the next token is quite a lot smaller.
The lexical action can then try to lookup the matched text in a hashtable,
which should be faster that the previous linear search through all the
keyword.
My completely trivial test case went from about 37 seconds to 34 with this
change.
</pre>
<hr /><a name="file1" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader"><big><b>aslexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=206&content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb.diff?r1=206&r2=207">-></a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/aslexer.rb?rev=207&content-type=text/vnd.viewcvs-markup">207</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/aslexer.rb 2005-07-18 16:31:52 UTC (rev 206)
+++ trunk/as2api/parse/aslexer.rb 2005-07-24 22:59:39 UTC (rev 207)
@@ -219,11 +219,24 @@
</small></pre><pre class="diff" id="context">
class ASLexer < AbstractLexer
</pre><pre class="diff" id="removed">-
</pre><pre class="diff" id="context"> def lex_simple_token(class_sym, match, io)
ActionScript::Parse.const_get(class_sym).new(io.lineno-1)
end
</pre><pre class="diff" id="added">+ def lex_key_or_ident_token(match, io)
+ body = match[0]
+ class_sym = @@keyword_tokens[body]
+ if class_sym
+ lex_simple_token(class_sym, match, io)
+ else
+ lex_simplebody_token(:IdentifierToken, match, io)
+ end
+ end
+
+ def self.keyword_tokens=(toks)
+ @@keyword_tokens = toks
+ end
+
</pre><pre class="diff" id="context"> def lex_simplebody_token(class_sym, match, io)
ActionScript::Parse.const_get(class_sym).new(match[0], io.lineno-1)
end
</pre><pre class="diff"><small id="info">@@ -287,15 +300,19 @@
</small></pre><pre class="diff" id="context">
builder.add_match(OMULTI_LINE_COMMENT, :lex_multilinecomment_token, :MultiLineCommentToken)
</pre><pre class="diff" id="added">+ keyword_tokens = {}
</pre><pre class="diff" id="context"> Keywords.each do |keyword|
</pre><pre class="diff" id="removed">- builder.make_keyword_token(keyword)
</pre><pre class="diff" id="added">+ builder.create_keytoken_class(keyword)
+ keyword_tokens[keyword] = "#{keyword.capitalize}Token".to_sym
</pre><pre class="diff" id="context"> end
</pre><pre class="diff" id="added">+ ASLexer.keyword_tokens = keyword_tokens
+
</pre><pre class="diff" id="context"> Punctuation.each do |punct|
builder.make_punctuation_token(*punct)
end
</pre><pre class="diff" id="removed">- builder.add_match(IDENT, :lex_simplebody_token, :IdentifierToken)
</pre><pre class="diff" id="added">+ builder.add_match(IDENT, :lex_key_or_ident_token, nil)
</pre><pre class="diff" id="context">
builder.add_match(STRING_START1, :lex_string1_token, :StringToken)
</pre></div>
<hr /><a name="file2" /><div class="file">
<span class="pathname"><a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk">trunk</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api">as2api</a>/<a
href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse">parse</a></span><br />
<div class="fileheader"><big><b>lexer.rb</b></big> <small id="info"><a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=206&content-type=text/vnd.viewcvs-markup">206</a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb.diff?r1=206&r2=207">-></a> <a href="http://svn.badgers-in-foil.co.uk/viewcvs.cgi/as2api/trunk/as2api/parse/lexer.rb?rev=207&content-type=text/vnd.viewcvs-markup">207</a></small></div>
<pre class="diff"><small id="info">--- trunk/as2api/parse/lexer.rb 2005-07-18 16:31:52 UTC (rev 206)
+++ trunk/as2api/parse/lexer.rb 2005-07-24 22:59:39 UTC (rev 207)
@@ -86,6 +86,16 @@
</small></pre><pre class="diff" id="context"> @matches << [make_match(match), lex_meth_sym, tok_class_sym]
end
</pre><pre class="diff" id="added">+ def create_keytoken_class(name)
+ the_class = Class.new(ASToken)
+ the_class.class_eval <<-EOE
+ def initialize(lineno)
+ super("#{name}", lineno)
+ end
+ EOE
+ ActionScript::Parse.const_set("#{name.capitalize}Token".to_sym, the_class)
+ end
+
</pre><pre class="diff" id="context"> def make_simple_token(name, value, match)
class_name = "#{name}Token"
the_class = Class.new(ASToken)
</pre><pre class="diff"><small id="info">@@ -116,7 +126,11 @@
</small></pre><pre class="diff" id="context"> @matches.each_with_index do |token_match, index|
re, lex_method, tok_class = token_match
text << "if line.scan(/#{re}/)\n"
</pre><pre class="diff" id="removed">- text << " emit(#{lex_method.to_s}(:#{tok_class.to_s}, line, @io))\n"
</pre><pre class="diff" id="added">+ if tok_class
+ text << " emit(#{lex_method.to_s}(:#{tok_class.to_s}, line, @io))\n"
+ else
+ text << " emit(#{lex_method.to_s}(line, @io))\n"
+ end
</pre><pre class="diff" id="context"> text << " next\n"
text << "end\n"
end
</pre></div>
<center><small><a href="http://www.badgers-in-foil.co.uk/projects/cvsspam/" title="commit -> email">CVSspam</a> 0.2.11</small></center>
</body></html>