PARSER_BEGIN(p) import java.util.*; import lib.*; import IRUtilities.*; public class Parser { public static void main(String args[]) throws ParseException { p parser = new p(System.in); parser.Input(); } } PARSER_END(p) SKIP : { " " | "\t" | "\n" | "\r" } // LEXICAL SPECIFICATIONS BEGIN HERE // Parsing rules // Stuff still needed -- handle commas in numbers TOKEN : { " > | " > | " > | " > | " > | " > | " > | " > | < Acronym: (["A"-"Z"]) (["A"-"Z"])* > | // Handle U.S < Abbreviation: (["A"-"Z"] ".")* > | // Handle single letter followed by dash followed by one or more // numbers -- so we get F-16 as a single token < Model: ["a"-"z","A"-"Z"] "-" (["0"-"9"])* > | < Word: ["a"-"z","A"-"Z"] ( ["a"-"z","A"-"Z" ])* > | < Integer: ["0"-"9"] (["0"-"9"])* > | < Decimal: (["0"-"9"])* "." (["0"-"9"])+ > | < Other: ~[] > } // Initializes a internally generated docid -- needs work here to // allow user to specify a start docid from the command prompt // Also, we don't want to reinit docid at start of each file void Input() : { int DocId = 0; Document d = new Document(0); } { (parseDocument(d) { d.output(); ++DocId; d = new Document(DocId); } )* } void parseDocument(Document d) : { } { ( { System.out.println(" "); System.out.println("Now Processing a new Document:"); System.out.println(" "); } header(d) text(d) ) {} } void text(Document d) : { String t; Porter p; } { (t = term() { // Lets find the stem of this word p = new Porter(t); System.out.println("Stem --> "+p.getStem()); System.out.println(); // Here we could check for stop words, stem and then // add to the document } )* } String term(): { Token t; } { ( (t = { System.out.println("Acronym--> " +t.image); } ) | (t = { System.out.println("Abbrev --> "+t.image); } ) | (t = { System.out.println("Word --> "+t.image); } ) | (t = { System.out.println("Integer--> "+t.image); } ) | (t = { System.out.println("Decimal--> "+t.image); } ) | (t = { System.out.println("Model --> "+t.image); } ) | (t = { // Nothing do -- just skip it } ) ) {return(t.image);} } void header(Document d): { String hl = ""; String dl = ""; String t; } { (dl = dateline() | hl = headline() | term())* {d.addDateLine(dl); d.addHeadLine(hl); } } String dateline(): { String t; String dl = ""; } { (t = term() { dl = dl + " " + t ; } )* {return(dl);} } String headline(): { String t; String hl = ""; } { (t = term() { hl = hl + " " + t; } )* {return(hl);} }