001 /*--------------------------------------------------------------------------+ 002 $Id: XMLUtils.java 26369 2010-03-01 18:28:42Z hummelb $ 003 | | 004 | Copyright 2005-2010 Technische Universitaet Muenchen | 005 | | 006 | Licensed under the Apache License, Version 2.0 (the "License"); | 007 | you may not use this file except in compliance with the License. | 008 | You may obtain a copy of the License at | 009 | | 010 | http://www.apache.org/licenses/LICENSE-2.0 | 011 | | 012 | Unless required by applicable law or agreed to in writing, software | 013 | distributed under the License is distributed on an "AS IS" BASIS, | 014 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 015 | See the License for the specific language governing permissions and | 016 | limitations under the License. | 017 +--------------------------------------------------------------------------*/ 018 package edu.tum.cs.commons.xml; 019 020 import java.io.File; 021 import java.io.FileInputStream; 022 import java.io.IOException; 023 import java.io.StringWriter; 024 import java.net.MalformedURLException; 025 import java.net.URL; 026 import java.util.ArrayList; 027 import java.util.List; 028 029 import javax.xml.parsers.DocumentBuilder; 030 import javax.xml.parsers.DocumentBuilderFactory; 031 import javax.xml.parsers.ParserConfigurationException; 032 import javax.xml.parsers.SAXParser; 033 import javax.xml.parsers.SAXParserFactory; 034 import javax.xml.transform.Transformer; 035 import javax.xml.transform.TransformerException; 036 import javax.xml.transform.TransformerFactory; 037 import javax.xml.transform.dom.DOMSource; 038 import javax.xml.transform.stream.StreamResult; 039 import javax.xml.transform.stream.StreamSource; 040 041 import org.w3c.dom.Document; 042 import org.w3c.dom.Element; 043 import org.w3c.dom.Node; 044 import org.w3c.dom.NodeList; 045 import org.xml.sax.ErrorHandler; 046 import org.xml.sax.InputSource; 047 import org.xml.sax.SAXException; 048 import org.xml.sax.SAXParseException; 049 import org.xml.sax.helpers.DefaultHandler; 050 051 import edu.tum.cs.commons.assertion.CCSMPre; 052 import edu.tum.cs.commons.string.StringUtils; 053 054 /** 055 * Collection of utility methods for XML. 056 * 057 * @author Florian Deissenboeck 058 * @author $Author: hummelb $ 059 * @version $Rev: 26369 $ 060 * @levd.rating GREEN Hash: 3E4CDEC5A0EAAC290A6EDF7279A9C811 061 */ 062 public class XMLUtils { 063 064 /** Identifier for schema source. */ 065 private static final String ATTRIBUTE_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; 066 067 /** Schema URL */ 068 private static final String SCHEMA_URL = "http://www.w3.org/2001/XMLSchema"; 069 070 /** Identifier for schema language. */ 071 private static final String ATTRIBUTE_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; 072 073 /** 074 * Parse a file without validation. 075 * 076 * @param file 077 * the file to parse. 078 * @return the DOM document. 079 * 080 * @throws SAXException 081 * if a parsing exception occurs, i.e. if the file is not 082 * well-formed. 083 * @throws IOException 084 * if an IO exception occurs. 085 */ 086 public static Document parse(File file) throws SAXException, IOException { 087 return createSchemaUnawareParser().parse(file); 088 } 089 090 /** 091 * Parse an input source without validation. 092 * 093 * @param input 094 * the input source to parse 095 * @return the DOM document. 096 * 097 * @throws SAXException 098 * if a parsing exception occurs, i.e. if the file is not 099 * well-formed. 100 * @throws IOException 101 * if an IO exception occurs. 102 */ 103 public static Document parse(InputSource input) throws SAXException, 104 IOException { 105 106 return createSchemaUnawareParser().parse(input); 107 } 108 109 /** 110 * Parse an input source using SAX without validation. 111 * 112 * 113 * @throws SAXException 114 * if a parsing exception occurs, i.e. if the file is not 115 * well-formed. 116 * @throws IOException 117 * if an IO exception occurs. 118 */ 119 public static void parseSAX(File file, DefaultHandler handler) 120 throws SAXException, IOException { 121 createSchemaUnawareSAXParser().parse(file, handler); 122 } 123 124 /** 125 * Parse an input source using SAX without validation. 126 * 127 * 128 * @throws SAXException 129 * if a parsing exception occurs, i.e. if the file is not 130 * well-formed. 131 * @throws IOException 132 * if an IO exception occurs. 133 */ 134 public static void parseSAX(InputSource input, DefaultHandler handler) 135 throws SAXException, IOException { 136 createSchemaUnawareSAXParser().parse(input, handler); 137 } 138 139 /** 140 * Parse and validate file using schema. This implements a custom error 141 * handler to avoid different behaviour between the JAXP implementations 142 * shipping with Java 1.5 and Java 1.6. 143 * 144 * @param file 145 * the file to parse. 146 * @param schemaURL 147 * URL point to schema, may not be null 148 * @return the DOM document. 149 * 150 * @throws SAXException 151 * if a parsing exception occurs, i.e. if the file is not 152 * well-formed or not valid 153 * @throws IOException 154 * if an IO exception occurs. 155 */ 156 public static Document parse(File file, URL schemaURL) throws SAXException, 157 IOException { 158 159 FileInputStream stream = new FileInputStream(file); 160 try { 161 return parse(new InputSource(stream), schemaURL); 162 } finally { 163 stream.close(); 164 } 165 } 166 167 /** 168 * Parse and validate file using schema. This implements a custom error 169 * handler to avoid different behaviour between the JAXP implementations 170 * shipping with Java 1.5 and Java 1.6. 171 * 172 * @param input 173 * the input to parse. 174 * @param schemaURL 175 * URL point to schema, may not be null 176 * @return the DOM document. 177 * 178 * @throws SAXException 179 * if a parsing exception occurs, i.e. if the file is not 180 * well-formed or not valid 181 * @throws IOException 182 * if an IO exception occurs. 183 */ 184 public static Document parse(InputSource input, URL schemaURL) 185 throws SAXException, IOException { 186 187 CCSMPre.isTrue(schemaURL != null, "Schema URL may not be null!"); 188 189 DocumentBuilder parser = createSchemaAwareParser(schemaURL); 190 191 XMLErrorHandler errorHandler = new XMLErrorHandler(); 192 parser.setErrorHandler(errorHandler); 193 Document document = parser.parse(input); 194 195 if (errorHandler.exception != null) { 196 throw errorHandler.exception; 197 } 198 199 return document; 200 } 201 202 /** 203 * Parse and validate file using SAX and schema. 204 * 205 * @param file 206 * the file to parse. 207 * @param schemaURL 208 * URL point to schema, may not be null 209 * 210 * @throws SAXException 211 * if a parsing exception occurs, i.e. if the file is not 212 * well-formed or not valid 213 * @throws IOException 214 * if an IO exception occurs. 215 */ 216 public static void parseSAX(File file, URL schemaURL, DefaultHandler handler) 217 throws SAXException, IOException { 218 219 FileInputStream stream = new FileInputStream(file); 220 try { 221 parseSAX(new InputSource(stream), schemaURL, handler); 222 } finally { 223 stream.close(); 224 } 225 } 226 227 /** 228 * Parse and validate file using SAX and schema. 229 * 230 * @param input 231 * the input to parse. 232 * @param schemaURL 233 * URL point to schema, may not be null 234 * 235 * @throws SAXException 236 * if a parsing exception occurs, i.e. if the file is not 237 * well-formed or not valid 238 * @throws IOException 239 * if an IO exception occurs. 240 */ 241 public static void parseSAX(InputSource input, URL schemaURL, 242 DefaultHandler handler) throws SAXException, IOException { 243 244 CCSMPre.isTrue(schemaURL != null, "Schema URL may not be null!"); 245 createSchemaAwareSAXParser(schemaURL).parse(input, handler); 246 } 247 248 /** Creates a schema-unaware XML parser */ 249 private static DocumentBuilder createSchemaUnawareParser() { 250 251 try { 252 return createNamespaceAwareDocumentBuilderFactory() 253 .newDocumentBuilder(); 254 } catch (ParserConfigurationException e) { 255 throw new IllegalStateException( 256 "No document builder found, probably Java is misconfigured!", 257 e); 258 } 259 } 260 261 /** Creates a schema-unaware SAX parser */ 262 private static SAXParser createSchemaUnawareSAXParser() throws SAXException { 263 try { 264 return createNamespaceAwareSAXParserFactory().newSAXParser(); 265 } catch (ParserConfigurationException e) { 266 throw new IllegalStateException( 267 "No SAX parser found, probably Java is misconfigured!", e); 268 } 269 } 270 271 /** Creates a schema-aware XML parser */ 272 private static DocumentBuilder createSchemaAwareParser(URL schemaURL) { 273 DocumentBuilderFactory dbf = createNamespaceAwareDocumentBuilderFactory(); 274 dbf.setValidating(true); 275 dbf.setAttribute(ATTRIBUTE_SCHEMA_LANGUAGE, SCHEMA_URL); 276 dbf.setAttribute(ATTRIBUTE_SCHEMA_SOURCE, schemaURL.toString()); 277 278 try { 279 return dbf.newDocumentBuilder(); 280 } catch (ParserConfigurationException e) { 281 throw new IllegalStateException( 282 "No document builder found, probably Java is misconfigured!", 283 e); 284 } 285 } 286 287 /** Creates a schema-aware SAX parser */ 288 private static SAXParser createSchemaAwareSAXParser(URL schemaURL) 289 throws SAXException { 290 SAXParserFactory spf = createNamespaceAwareSAXParserFactory(); 291 spf.setValidating(true); 292 try { 293 SAXParser parser = spf.newSAXParser(); 294 parser.setProperty(ATTRIBUTE_SCHEMA_LANGUAGE, SCHEMA_URL); 295 parser.setProperty(ATTRIBUTE_SCHEMA_SOURCE, schemaURL.toString()); 296 return parser; 297 } catch (ParserConfigurationException e) { 298 throw new IllegalStateException( 299 "No SAX parser found, probably Java is misconfigured!", e); 300 } 301 } 302 303 /** Creates a namespace-aware {@link DocumentBuilderFactory} */ 304 private static DocumentBuilderFactory createNamespaceAwareDocumentBuilderFactory() { 305 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 306 dbf.setNamespaceAware(true); 307 308 return dbf; 309 } 310 311 /** Creates a namespace-aware {@link SAXParserFactory} */ 312 private static SAXParserFactory createNamespaceAwareSAXParserFactory() { 313 SAXParserFactory spf = SAXParserFactory.newInstance(); 314 spf.setNamespaceAware(true); 315 return spf; 316 } 317 318 /** 319 * Same as {@link #parse(File, URL)} but with schema file. 320 * 321 * @throws IllegalArgumentException 322 * if the schema file could not be converted to an URL 323 */ 324 public static Document parse(File file, File schema) throws SAXException, 325 IOException { 326 try { 327 return parse(file, schema.toURI().toURL()); 328 } catch (MalformedURLException e) { 329 throw new IllegalArgumentException( 330 "Schema file could not be converted to URL: " + e); 331 } 332 } 333 334 /** 335 * Returns a string representation of the given XML document, which is 336 * "pretty printed", i.e. the tags are indented. 337 */ 338 public static String prettyPrint(Document doc) throws TransformerException { 339 URL url = XMLUtils.class.getResource("pretty.xsl"); 340 StreamSource xslSource = new StreamSource(url.toExternalForm()); 341 Transformer transformer = TransformerFactory.newInstance() 342 .newTransformer(xslSource); 343 344 DOMSource source = new DOMSource(doc); 345 StringWriter writer = new StringWriter(); 346 StreamResult result = new StreamResult(writer); 347 transformer.transform(source, result); 348 return StringUtils.normalizeLineBreaks(writer.toString()); 349 } 350 351 /** 352 * Determines the index (starting at 0) of the given element relative to 353 * other element nodes for the same parent. 354 */ 355 public static int getElementPosition(Element element) { 356 int num = -1; 357 Node node = element; 358 while (node != null) { 359 if (node.getNodeType() == Node.ELEMENT_NODE) { 360 ++num; 361 } 362 node = node.getPreviousSibling(); 363 } 364 return num; 365 } 366 367 /** 368 * Returns all children of the given element which are element named as 369 * specified. 370 */ 371 public static List<Element> getNamedChildren(Element element, 372 String elementNames) { 373 List<Element> result = new ArrayList<Element>(); 374 NodeList children = element.getChildNodes(); 375 for (int i = 0; i < children.getLength(); ++i) { 376 Node node = children.item(i); 377 if (node.getNodeType() == Node.ELEMENT_NODE 378 && node.getNodeName().equals(elementNames)) { 379 result.add((Element) node); 380 } 381 } 382 return result; 383 } 384 385 /** 386 * Returns the first child of the given element which is an element named as 387 * specified. Returns null if none are found. 388 */ 389 public static Element getNamedChild(Element element, String name) { 390 List<Element> children = XMLUtils.getNamedChildren(element, name); 391 if (children.size() > 0) { 392 return children.get(0); 393 } 394 return null; 395 } 396 397 /** 398 * Get the text content of the given element's first child that is an 399 * element named as specified. If none is found, the empty string is 400 * returned. 401 */ 402 public static String getNamedChildContent(Element parent, String name) { 403 Element element = XMLUtils.getNamedChild(parent, name); 404 if (element == null) { 405 return StringUtils.EMPTY_STRING; 406 } 407 return element.getTextContent(); 408 } 409 410 /** 411 * Extracts all ElementNodes from a NodeList and returns the result as a 412 * list. 413 * 414 * @param nodeList 415 * the NodeList to be searched for ElementNodes. 416 * @return an array containing all ElementNodes stored in the given node 417 * list or null if the input has been null. 418 */ 419 public static List<Element> elementNodes(NodeList nodeList) { 420 if (nodeList == null) { 421 return null; 422 } 423 List<Element> result = new ArrayList<Element>(); 424 int len = nodeList.getLength(); 425 for (int i = 0; i < len; ++i) { 426 Node node = nodeList.item(i); 427 if (node.getNodeType() == Node.ELEMENT_NODE) { 428 result.add((Element) node); 429 } 430 } 431 return result; 432 } 433 434 /** 435 * Get all leaf elements of an XML tree rooted at an element 436 * 437 * @param root 438 * The root element 439 * @return List of all leaf elements 440 */ 441 public static List<Element> leafElementNodes(Element root) { 442 List<Element> leafElementNodes = new ArrayList<Element>(); 443 leafElementNodes(root, leafElementNodes); 444 return leafElementNodes; 445 } 446 447 /** Add all leaf element nodes of an XML tree rooted at an element to a list */ 448 private static void leafElementNodes(Element root, 449 List<Element> leafElementNodes) { 450 List<Element> children = XMLUtils.elementNodes(root.getChildNodes()); 451 if (children.isEmpty()) { 452 leafElementNodes.add(root); 453 } else { 454 for (Element child : children) { 455 leafElementNodes(child, leafElementNodes); 456 } 457 } 458 } 459 460 /** Creates an empty XML document. */ 461 public static Document createEmptyDocument() { 462 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 463 DocumentBuilder builder; 464 try { 465 builder = factory.newDocumentBuilder(); 466 } catch (ParserConfigurationException e) { 467 throw new IllegalStateException( 468 "No document builder found, probably Java is misconfigured!", 469 e); 470 } 471 return builder.newDocument(); 472 } 473 474 /** 475 * Simple error handler for handling validation errors. This handler stores 476 * the first problem raised during parsing. 477 */ 478 private static class XMLErrorHandler implements ErrorHandler { 479 480 /** 481 * The stored exception. Value unequal <code>null</code> signals a 482 * validation problem. 483 */ 484 private SAXParseException exception; 485 486 /** {@inheritDoc} */ 487 public void error(SAXParseException exception) { 488 if (this.exception == null) { 489 this.exception = exception; 490 } 491 } 492 493 /** {@inheritDoc} */ 494 public void fatalError(SAXParseException exception) { 495 error(exception); 496 } 497 498 /** {@inheritDoc} */ 499 public void warning(SAXParseException exception) { 500 System.out.println(exception); 501 // ignore 502 } 503 } 504 }