001package org.maltparser.core.syntaxgraph.headrules; 002 003import java.io.BufferedReader; 004import java.io.IOException; 005import java.io.InputStreamReader; 006import java.net.URL; 007 008import org.apache.log4j.Logger; 009import org.maltparser.core.exception.MaltChainedException; 010import org.maltparser.core.helper.HashMap; 011import org.maltparser.core.helper.URLFinder; 012import org.maltparser.core.io.dataformat.DataFormatInstance; 013import org.maltparser.core.symbol.SymbolTable; 014import org.maltparser.core.symbol.SymbolTableHandler; 015import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 016import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 017 018/** 019* 020* 021* @author Johan Hall 022*/ 023public class HeadRules extends HashMap<String,HeadRule> { 024 public static final long serialVersionUID = 8045568022124826323L; 025 protected Logger logger; 026 protected String name; 027 private final SymbolTableHandler symbolTableHandler; 028 private final DataFormatInstance dataFormatInstance; 029 protected SymbolTable nonTerminalSymbolTable; // TODO more complex 030 protected SymbolTable edgelabelSymbolTable; // TODO more complex 031 032 public HeadRules(Logger logger, DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTableHandler) throws MaltChainedException { 033 setLogger(logger); 034 this.dataFormatInstance = dataFormatInstance; 035 this.symbolTableHandler = symbolTableHandler; 036 nonTerminalSymbolTable = symbolTableHandler.addSymbolTable("CAT"); 037 edgelabelSymbolTable = symbolTableHandler.addSymbolTable("LABEL"); 038 } 039 040 public void parseHeadRules(String fileName) throws MaltChainedException { 041 final URLFinder f = new URLFinder(); 042 parseHeadRules(f.findURL(fileName)); 043 } 044 045 public void parseHeadRules(URL url) throws MaltChainedException { 046 BufferedReader br = null; 047 try { 048 br = new BufferedReader(new InputStreamReader(url.openStream())); 049 } catch (IOException e) { 050 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 051 } 052 if (logger.isInfoEnabled()) { 053 logger.debug("Loading the head rule specification '"+url.toString()+"' ...\n"); 054 } 055 String fileLine; 056 while (true) { 057 try { 058 fileLine = br.readLine(); 059 } catch (IOException e) { 060 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 061 } 062 if (fileLine == null) { 063 break; 064 } 065 if (fileLine.length() <= 1 && fileLine.trim().substring(0, 2).trim().equals("--")) { 066 continue; 067 } 068 int index = fileLine.indexOf('\t'); 069 if (index == -1) { 070 throw new HeadRuleException("The specification of the head rule is not correct '"+fileLine+"'. "); 071 } 072 073 HeadRule rule = new HeadRule(this, fileLine); 074 put(fileLine.substring(0,index), rule); 075 } 076 } 077 078 public PhraseStructureNode getHeadChild(NonTerminalNode nt) throws MaltChainedException { 079 HeadRule rule = null; 080 if (nt.hasLabel(nonTerminalSymbolTable)) { 081 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 082 } 083 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 084 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 085 } 086 087 if (rule != null) { 088 return rule.getHeadChild(nt); 089 } 090 return null; 091 } 092 093 public Direction getDefaultDirection(NonTerminalNode nt) throws MaltChainedException { 094 HeadRule rule = null; 095 if (nt.hasLabel(nonTerminalSymbolTable)) { 096 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 097 } 098 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 099 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 100 } 101 102 if (rule != null) { 103 return rule.getDefaultDirection(); 104 } 105 return Direction.LEFT; 106 } 107 108 public Logger getLogger() { 109 return logger; 110 } 111 112 public void setLogger(Logger logger) { 113 this.logger = logger; 114 } 115 116 public DataFormatInstance getDataFormatInstance() { 117 return dataFormatInstance; 118 } 119 120 public SymbolTableHandler getSymbolTableHandler() { 121 return symbolTableHandler; 122 } 123 124 public String toString() { 125 final StringBuilder sb = new StringBuilder(); 126 for (HeadRule rule : this.values()) { 127 sb.append(rule); 128 sb.append('\n'); 129 } 130 return sb.toString(); 131 } 132}