001 package org.maltparser.core.syntaxgraph.headrules; 002 003 import java.io.BufferedReader; 004 import java.io.IOException; 005 import java.io.InputStreamReader; 006 import java.net.URL; 007 008 import org.apache.log4j.Logger; 009 import org.maltparser.core.exception.MaltChainedException; 010 import org.maltparser.core.helper.HashMap; 011 import org.maltparser.core.helper.URLFinder; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.symbol.SymbolTable; 014 import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 015 import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 016 017 /** 018 * 019 * 020 * @author Johan Hall 021 */ 022 public class HeadRules extends HashMap<String,HeadRule> { 023 public static final long serialVersionUID = 8045568022124826323L; 024 protected Logger logger; 025 protected String name; 026 protected DataFormatInstance dataFormatInstance; 027 protected SymbolTable nonTerminalSymbolTable; // TODO more complex 028 protected SymbolTable edgelabelSymbolTable; // TODO more complex 029 030 public HeadRules(Logger logger, DataFormatInstance dataFormatInstance) throws MaltChainedException { 031 setLogger(logger); 032 setDataFormatInstance(dataFormatInstance); 033 nonTerminalSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("CAT"); 034 edgelabelSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("LABEL"); 035 } 036 037 public void parseHeadRules(String fileName) throws MaltChainedException { 038 final URLFinder f = new URLFinder(); 039 parseHeadRules(f.findURL(fileName)); 040 } 041 042 public void parseHeadRules(URL url) throws MaltChainedException { 043 BufferedReader br = null; 044 try { 045 br = new BufferedReader(new InputStreamReader(url.openStream())); 046 } catch (IOException e) { 047 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 048 } 049 if (logger.isInfoEnabled()) { 050 logger.debug("Loading the head rule specification '"+url.toString()+"' ...\n"); 051 } 052 String fileLine; 053 while (true) { 054 try { 055 fileLine = br.readLine(); 056 } catch (IOException e) { 057 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 058 } 059 if (fileLine == null) { 060 break; 061 } 062 if (fileLine.length() <= 1 && fileLine.trim().substring(0, 2).trim().equals("--")) { 063 continue; 064 } 065 int index = fileLine.indexOf('\t'); 066 if (index == -1) { 067 throw new HeadRuleException("The specification of the head rule is not correct '"+fileLine+"'. "); 068 } 069 070 HeadRule rule = new HeadRule(this, fileLine); 071 put(fileLine.substring(0,index), rule); 072 } 073 } 074 075 public PhraseStructureNode getHeadChild(NonTerminalNode nt) throws MaltChainedException { 076 HeadRule rule = null; 077 if (nt.hasLabel(nonTerminalSymbolTable)) { 078 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 079 } 080 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 081 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 082 } 083 084 if (rule != null) { 085 return rule.getHeadChild(nt); 086 } 087 return null; 088 } 089 090 public Direction getDefaultDirection(NonTerminalNode nt) throws MaltChainedException { 091 HeadRule rule = null; 092 if (nt.hasLabel(nonTerminalSymbolTable)) { 093 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 094 } 095 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 096 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 097 } 098 099 if (rule != null) { 100 return rule.getDefaultDirection(); 101 } 102 return Direction.LEFT; 103 } 104 105 public Logger getLogger() { 106 return logger; 107 } 108 109 public void setLogger(Logger logger) { 110 this.logger = logger; 111 } 112 113 public DataFormatInstance getDataFormatInstance() { 114 return dataFormatInstance; 115 } 116 117 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 118 this.dataFormatInstance = dataFormatInstance; 119 } 120 121 public String toString() { 122 final StringBuilder sb = new StringBuilder(); 123 for (HeadRule rule : this.values()) { 124 sb.append(rule); 125 sb.append('\n'); 126 } 127 return sb.toString(); 128 } 129 }