001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.util.xml; 016 017 import org.apache.commons.logging.Log; 018 import org.apache.commons.logging.LogFactory; 019 import org.apache.hivemind.ApplicationRuntimeException; 020 import org.apache.hivemind.HiveMind; 021 import org.apache.hivemind.Location; 022 import org.apache.hivemind.Resource; 023 import org.apache.hivemind.impl.LocationImpl; 024 import org.apache.tapestry.Tapestry; 025 import org.apache.tapestry.util.RegexpMatcher; 026 import org.xml.sax.*; 027 import org.xml.sax.helpers.DefaultHandler; 028 029 import javax.xml.parsers.ParserConfigurationException; 030 import javax.xml.parsers.SAXParser; 031 import javax.xml.parsers.SAXParserFactory; 032 import java.io.IOException; 033 import java.io.InputStream; 034 import java.net.URL; 035 import java.util.ArrayList; 036 import java.util.HashMap; 037 import java.util.List; 038 import java.util.Map; 039 040 /** 041 * A simplified version of org.apache.commons.digester.Digester. This version is without as 042 * many bells and whistles but has some key features needed when parsing a document (rather than a 043 * configuration file): <br> 044 * <ul> 045 * <li>Notifications for each bit of text</li> 046 * <li>Tracking of exact location within the document.</li> 047 * </ul> 048 * <p> 049 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more 050 * coding), in that there's a one-to-one relationship between an element and a rule. 051 * <p> 052 * Based on SAX2. 053 * 054 * @author Howard Lewis Ship 055 * @since 3.0 056 */ 057 058 public class RuleDirectedParser extends DefaultHandler 059 { 060 private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class); 061 062 private static SAXParserFactory _parserFactory; 063 064 private Resource _documentLocation; 065 066 private List _ruleStack = new ArrayList(); 067 068 private List _objectStack = new ArrayList(); 069 070 private Object _documentObject; 071 072 private Locator _locator; 073 074 private int _line = -1; 075 076 private int _column = -1; 077 078 private Location _location; 079 080 private SAXParser _parser; 081 082 private RegexpMatcher _matcher; 083 084 private String _uri; 085 086 private String _localName; 087 088 private String _qName; 089 090 /** 091 * Map of {@link IRule}keyed on the local name of the element. 092 */ 093 private Map _ruleMap = new HashMap(); 094 095 /** 096 * Used to accumlate content provided by 097 * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}. 098 */ 099 100 private StringBuffer _contentBuffer = new StringBuffer(); 101 102 /** 103 * Map of paths to external entities (such as the DTD) keyed on public id. 104 */ 105 106 private Map _entities = new HashMap(); 107 108 public Object parse(Resource documentLocation) 109 { 110 if (LOG.isDebugEnabled()) 111 LOG.debug("Parsing: " + documentLocation); 112 113 try 114 { 115 _documentLocation = documentLocation; 116 117 URL url = documentLocation.getResourceURL(); 118 119 if (url == null) 120 throw new DocumentParseException(Tapestry.format("RuleDrivenParser.resource-missing", documentLocation), documentLocation); 121 122 return parse(url); 123 } 124 finally 125 { 126 _documentLocation = null; 127 _ruleStack.clear(); 128 _objectStack.clear(); 129 _documentObject = null; 130 131 _uri = null; 132 _localName = null; 133 _qName = null; 134 135 _line = -1; 136 _column = -1; 137 _location = null; 138 _locator = null; 139 140 _contentBuffer.setLength(0); 141 } 142 } 143 144 protected Object parse(URL url) 145 { 146 if (_parser == null) 147 _parser = constructParser(); 148 149 InputStream stream = null; 150 151 try 152 { 153 stream = url.openStream(); 154 } 155 catch (IOException ex) 156 { 157 throw new DocumentParseException(Tapestry.format( 158 "RuleDrivenParser.unable-to-open-resource", 159 url), _documentLocation, ex); 160 } 161 162 InputSource source = new InputSource(stream); 163 164 try 165 { 166 _parser.parse(source, this); 167 168 stream.close(); 169 } 170 catch (Exception ex) 171 { 172 throw new DocumentParseException(Tapestry.format( 173 "RuleDrivenParser.parse-error", 174 url, 175 ex.getMessage()), getLocation(), ex); 176 } 177 178 if (LOG.isDebugEnabled()) 179 LOG.debug("Document parsed as: " + _documentObject); 180 181 return _documentObject; 182 } 183 184 /** 185 * Returns an {@link Location}representing the current position within the document (depending 186 * on the parser, this may be accurate to column number level). 187 */ 188 189 public Location getLocation() 190 { 191 if (_locator == null) 192 return null; 193 194 int line = _locator.getLineNumber(); 195 int column = _locator.getColumnNumber(); 196 197 if (_line != line || _column != column) 198 { 199 _location = null; 200 _line = line; 201 _column = column; 202 } 203 204 if (_location == null) 205 _location = new LocationImpl(_documentLocation, _line, _column); 206 207 return _location; 208 } 209 210 /** 211 * Pushes an object onto the object stack. The first object pushed is the "document object", the 212 * root object returned by the parse. 213 */ 214 public void push(Object object) 215 { 216 if (_documentObject == null) 217 _documentObject = object; 218 219 push(_objectStack, object, "object stack"); 220 } 221 222 /** 223 * Returns the top object on the object stack. 224 */ 225 public Object peek() 226 { 227 return peek(_objectStack, 0); 228 } 229 230 /** 231 * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is 232 * the next-to-top object, etc. 233 */ 234 235 public Object peek(int depth) 236 { 237 return peek(_objectStack, depth); 238 } 239 240 /** 241 * Removes and returns the top object on the object stack. 242 */ 243 public Object pop() 244 { 245 return pop(_objectStack, "object stack"); 246 } 247 248 private Object pop(List list, String name) 249 { 250 Object result = list.remove(list.size() - 1); 251 252 if (LOG.isDebugEnabled()) 253 LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")"); 254 255 return result; 256 } 257 258 private Object peek(List list, int depth) 259 { 260 return list.get(list.size() - 1 - depth); 261 } 262 263 private void push(List list, Object object, String name) 264 { 265 if (LOG.isDebugEnabled()) 266 LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")"); 267 268 list.add(object); 269 } 270 271 /** 272 * Pushes a new rule onto the rule stack. 273 */ 274 275 protected void pushRule(IRule rule) 276 { 277 push(_ruleStack, rule, "rule stack"); 278 } 279 280 /** 281 * Returns the top rule on the stack. 282 */ 283 284 protected IRule peekRule() 285 { 286 return (IRule) peek(_ruleStack, 0); 287 } 288 289 protected IRule popRule() 290 { 291 return (IRule) pop(_ruleStack, "rule stack"); 292 } 293 294 public void addRule(String localElementName, IRule rule) 295 { 296 _ruleMap.put(localElementName, rule); 297 } 298 299 /** 300 * Registers a public id and corresponding input source. Generally, the source is a wrapper 301 * around an input stream to a package resource. 302 * 303 * @param publicId 304 * the public identifier to be registerred, generally the publicId of a DTD related 305 * to the document being parsed 306 * @param entityPath 307 * the resource path of the entity, typically a DTD file. Relative files names are 308 * expected to be stored in the same package as the class file, otherwise a leading 309 * slash is an absolute pathname within the classpath. 310 */ 311 312 public void registerEntity(String publicId, String entityPath) 313 { 314 if (LOG.isDebugEnabled()) 315 LOG.debug("Registering " + publicId + " as " + entityPath); 316 317 if (_entities == null) 318 _entities = new HashMap(); 319 320 _entities.put(publicId, entityPath); 321 } 322 323 protected IRule selectRule(String localName, Attributes attributes) 324 { 325 IRule rule = (IRule) _ruleMap.get(localName); 326 327 if (rule == null) 328 throw new DocumentParseException(Tapestry.format( 329 "RuleDrivenParser.no-rule-for-element", 330 localName), getLocation()); 331 332 return rule; 333 } 334 335 /** 336 * Uses the {@link Locator}to track the position in the document as a {@link Location}. This 337 * is invoked once (before the initial element is parsed) and the Locator is retained and 338 * queried as to the current file location. 339 * 340 * @see #getLocation() 341 */ 342 public void setDocumentLocator(Locator locator) 343 { 344 _locator = locator; 345 } 346 347 /** 348 * Accumulates the content in a buffer; the concatinated content is provided to the top rule 349 * just before any start or end tag. 350 */ 351 public void characters(char[] ch, int start, int length) throws SAXException 352 { 353 _contentBuffer.append(ch, start, length); 354 } 355 356 /** 357 * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}. 358 */ 359 public void endElement(String uri, String localName, String qName) throws SAXException 360 { 361 fireContentRule(); 362 363 _uri = uri; 364 _localName = localName; 365 _qName = qName; 366 367 popRule().endElement(this); 368 } 369 370 /** 371 * Ignorable content is ignored. 372 */ 373 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 374 { 375 } 376 377 /** 378 * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto 379 * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}. 380 */ 381 public void startElement(String uri, String localName, String qName, Attributes attributes) 382 throws SAXException 383 { 384 fireContentRule(); 385 386 _uri = uri; 387 _localName = localName; 388 _qName = qName; 389 390 String name = extractName(uri, localName, qName); 391 392 IRule newRule = selectRule(name, attributes); 393 394 pushRule(newRule); 395 396 newRule.startElement(this, attributes); 397 } 398 399 private String extractName(String uri, String localName, String qName) 400 { 401 return HiveMind.isBlank(localName) ? qName : localName; 402 } 403 404 /** 405 * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2 406 * parser. 407 */ 408 protected synchronized SAXParser constructParser() 409 { 410 if (_parserFactory == null) 411 { 412 _parserFactory = SAXParserFactory.newInstance(); 413 configureParserFactory(_parserFactory); 414 } 415 416 try 417 { 418 return _parserFactory.newSAXParser(); 419 } 420 catch (SAXException ex) 421 { 422 throw new ApplicationRuntimeException(ex); 423 } 424 catch (ParserConfigurationException ex) 425 { 426 throw new ApplicationRuntimeException(ex); 427 } 428 429 } 430 431 /** 432 * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is 433 * invoked. The default implementation sets validating to true and namespaceAware to false, 434 */ 435 436 protected void configureParserFactory(SAXParserFactory factory) 437 { 438 factory.setValidating(true); 439 factory.setNamespaceAware(false); 440 } 441 442 /** 443 * Throws the exception. 444 */ 445 public void error(SAXParseException ex) throws SAXException 446 { 447 fatalError(ex); 448 } 449 450 /** 451 * Throws the exception. 452 */ 453 public void fatalError(SAXParseException ex) throws SAXException 454 { 455 // Sometimes, a bad parse "corrupts" a parser so that it doesn't 456 // work properly for future parses (of valid documents), 457 // so discard it here. 458 459 _parser = null; 460 461 throw ex; 462 } 463 464 /** 465 * Throws the exception. 466 */ 467 public void warning(SAXParseException ex) throws SAXException 468 { 469 fatalError(ex); 470 } 471 472 public InputSource resolveEntity(String publicId, String systemId) throws SAXException 473 { 474 String entityPath = null; 475 476 if (LOG.isDebugEnabled()) 477 LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " 478 + systemId); 479 480 if (_entities != null) 481 entityPath = (String) _entities.get(publicId); 482 483 if (entityPath == null) 484 { 485 if (LOG.isDebugEnabled()) 486 LOG.debug("Entity not found, using " + systemId); 487 488 return null; 489 } 490 491 InputStream stream = getClass().getResourceAsStream(entityPath); 492 493 InputSource result = new InputSource(stream); 494 495 if (result != null && LOG.isDebugEnabled()) 496 LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); 497 498 return result; 499 } 500 501 /** 502 * Validates that the input value matches against the specified Perl5 pattern. If valid, the 503 * method simply returns. If not a match, then an error message is generated (using the errorKey 504 * and the input value) and a {@link InvalidStringException}is thrown. 505 */ 506 507 public void validate(String value, String pattern, String errorKey) 508 { 509 if (_matcher == null) 510 _matcher = new RegexpMatcher(); 511 512 if (_matcher.matches(pattern, value)) 513 return; 514 515 throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation()); 516 } 517 518 public Resource getDocumentLocation() 519 { 520 return _documentLocation; 521 } 522 523 /** 524 * Returns the localName for the current element. 525 * 526 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 527 * java.lang.String, org.xml.sax.Attributes) 528 */ 529 public String getLocalName() 530 { 531 return _localName; 532 } 533 534 /** 535 * Returns the qualified name for the current element. 536 * 537 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 538 * java.lang.String, org.xml.sax.Attributes) 539 */ 540 public String getQName() 541 { 542 return _qName; 543 } 544 545 /** 546 * Returns the URI for the current element. 547 * 548 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 549 * java.lang.String, org.xml.sax.Attributes) 550 */ 551 public String getUri() 552 { 553 return _uri; 554 } 555 556 private void fireContentRule() 557 { 558 String content = _contentBuffer.toString(); 559 _contentBuffer.setLength(0); 560 561 if (!_ruleStack.isEmpty()) 562 peekRule().content(this, content); 563 } 564 565 }