001 package org.maltparser.core.io.dataformat;
002
003 import java.net.URL;
004 import java.util.HashSet;
005 import java.util.SortedMap;
006 import java.util.TreeMap;
007
008 import javax.xml.parsers.DocumentBuilder;
009 import javax.xml.parsers.DocumentBuilderFactory;
010 import javax.xml.parsers.ParserConfigurationException;
011
012 import org.maltparser.core.exception.MaltChainedException;
013 import org.maltparser.core.helper.SystemLogger;
014 import org.maltparser.core.helper.Util;
015 import org.maltparser.core.symbol.SymbolTableHandler;
016 import org.w3c.dom.Element;
017 import org.w3c.dom.NodeList;
018 import org.xml.sax.SAXException;
019
020 /**
021 *
022 *
023 * @author Johan Hall
024 * @since 1.0
025 **/
026 public class DataFormatSpecification {
027 public enum DataStructure {
028 DEPENDENCY, // Dependency structure
029 PHRASE, // Phrase structure
030 };
031 private int entryPositionCounter;
032 private String dataFormatName;
033 private DataStructure dataStructure;
034 private final SortedMap<String, DataFormatEntry> entries;
035 private final HashSet<Dependency> dependencies;
036 // private final HashSet<SyntaxGraphReader> supportedReaders;
037 // private final HashSet<SyntaxGraphWriter> supportedWriters;
038
039 public DataFormatSpecification() {
040 entries = new TreeMap<String, DataFormatEntry>();
041 entryPositionCounter = 0;
042 dependencies = new HashSet<Dependency>();
043 // supportedReaders = new HashSet<SyntaxGraphReader>();
044 // supportedWriters = new HashSet<SyntaxGraphWriter>();
045 }
046
047 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel) throws MaltChainedException {
048 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, rootLabel, this);
049
050 }
051
052 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
053 URL url = Util.findURL(fileName);
054 if (url == null) {
055 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056 }
057 parseDataFormatXMLfile(url);
058 }
059
060 public HashSet<Dependency> getDependencies() {
061 return dependencies;
062 }
063
064 public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065 if (url == null) {
066 throw new DataFormatException("The data format specifcation file cannot be found. ");
067 }
068
069 if (SystemLogger.logger().isInfoEnabled()) {
070 int index = url.toString().indexOf('!');
071 if (index == -1) {
072 SystemLogger.logger().debug(" Data Format : "+url.toString()+"\n");
073 } else {
074 SystemLogger.logger().debug(" Data Format : "+url.toString().substring(index+1)+"\n");
075 }
076 }
077
078 try {
079 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
080 DocumentBuilder db = dbf.newDocumentBuilder();
081
082 Element root = db.parse(url.openStream()).getDocumentElement();
083 if (root.getNodeName().equals("dataformat")) {
084 dataFormatName = root.getAttribute("name");
085 if (root.getAttribute("datastructure").length() > 0) {
086 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
087 } else {
088 dataStructure = DataStructure.DEPENDENCY;
089 }
090 } else {
091 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
092 }
093 NodeList cols = root.getElementsByTagName("column");
094 Element col = null;
095 for (int i = 0, n = cols.getLength(); i < n; i++) {
096 col = (Element)cols.item(i);
097 DataFormatEntry entry = new DataFormatEntry(i, col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
098 entries.put(entry.getDataFormatEntryName(), entry);
099 }
100 NodeList deps = root.getElementsByTagName("dependencies");
101 if (deps.getLength() > 0) {
102 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
103 for (int i = 0, n = dep.getLength(); i < n; i++) {
104 Element e = (Element)dep.item(i);
105 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
106 }
107 }
108 } catch (java.io.IOException e) {
109 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
110 } catch (ParserConfigurationException e) {
111 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
112 } catch (SAXException e) {
113 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
114 }
115 }
116
117 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
118 DataFormatEntry entry = new DataFormatEntry(entryPositionCounter++, dataFormatEntryName, category, type, defaultOutput);
119 entries.put(entry.getDataFormatEntryName(), entry);
120 }
121
122 public DataFormatEntry getEntry(String dataFormatEntryName) {
123 return entries.get(dataFormatEntryName);
124 }
125
126 public String getDataFormatName() {
127 return dataFormatName;
128 }
129
130 public DataStructure getDataStructure() {
131 return dataStructure;
132 }
133
134 public String toString() {
135 final StringBuilder sb = new StringBuilder();
136 sb.append("Data format specification: ");
137 sb.append(dataFormatName);
138 sb.append('\n');
139 for (DataFormatEntry dfe : entries.values()) {
140 sb.append(dfe);
141 sb.append('\n');
142 }
143 return sb.toString();
144 }
145
146 public class Dependency {
147 protected String dependentOn;
148 protected String urlString;
149 protected String map;
150 protected String mapUrl;
151
152 public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
153 setDependentOn(dependentOn);
154 setUrlString(urlString);
155 setMap(map);
156 setMapUrl(mapUrl);
157 }
158
159 public String getDependentOn() {
160 return dependentOn;
161 }
162 protected void setDependentOn(String dependentOn) {
163 this.dependentOn = dependentOn;
164 }
165
166 public String getUrlString() {
167 return urlString;
168 }
169
170 public void setUrlString(String urlString) {
171 this.urlString = urlString;
172 }
173
174 public String getMap() {
175 return map;
176 }
177 protected void setMap(String map) {
178 this.map = map;
179 }
180
181 public String getMapUrl() {
182 return mapUrl;
183 }
184
185 public void setMapUrl(String mapUrl) {
186 this.mapUrl = mapUrl;
187 }
188 }
189 }