001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.util.xml;
016
017 import org.apache.commons.logging.Log;
018 import org.apache.commons.logging.LogFactory;
019 import org.apache.hivemind.ApplicationRuntimeException;
020 import org.apache.hivemind.HiveMind;
021 import org.apache.hivemind.Location;
022 import org.apache.hivemind.Resource;
023 import org.apache.hivemind.impl.LocationImpl;
024 import org.apache.tapestry.Tapestry;
025 import org.apache.tapestry.util.RegexpMatcher;
026 import org.xml.sax.*;
027 import org.xml.sax.helpers.DefaultHandler;
028
029 import javax.xml.parsers.ParserConfigurationException;
030 import javax.xml.parsers.SAXParser;
031 import javax.xml.parsers.SAXParserFactory;
032 import java.io.IOException;
033 import java.io.InputStream;
034 import java.net.URL;
035 import java.util.ArrayList;
036 import java.util.HashMap;
037 import java.util.List;
038 import java.util.Map;
039
040 /**
041 * A simplified version of org.apache.commons.digester.Digester. This version is without as
042 * many bells and whistles but has some key features needed when parsing a document (rather than a
043 * configuration file): <br>
044 * <ul>
045 * <li>Notifications for each bit of text</li>
046 * <li>Tracking of exact location within the document.</li>
047 * </ul>
048 * <p>
049 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
050 * coding), in that there's a one-to-one relationship between an element and a rule.
051 * <p>
052 * Based on SAX2.
053 *
054 * @author Howard Lewis Ship
055 * @since 3.0
056 */
057
058 public class RuleDirectedParser extends DefaultHandler
059 {
060 private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
061
062 private static SAXParserFactory _parserFactory;
063
064 private Resource _documentLocation;
065
066 private List _ruleStack = new ArrayList();
067
068 private List _objectStack = new ArrayList();
069
070 private Object _documentObject;
071
072 private Locator _locator;
073
074 private int _line = -1;
075
076 private int _column = -1;
077
078 private Location _location;
079
080 private SAXParser _parser;
081
082 private RegexpMatcher _matcher;
083
084 private String _uri;
085
086 private String _localName;
087
088 private String _qName;
089
090 /**
091 * Map of {@link IRule}keyed on the local name of the element.
092 */
093 private Map _ruleMap = new HashMap();
094
095 /**
096 * Used to accumlate content provided by
097 * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
098 */
099
100 private StringBuffer _contentBuffer = new StringBuffer();
101
102 /**
103 * Map of paths to external entities (such as the DTD) keyed on public id.
104 */
105
106 private Map _entities = new HashMap();
107
108 public Object parse(Resource documentLocation)
109 {
110 if (LOG.isDebugEnabled())
111 LOG.debug("Parsing: " + documentLocation);
112
113 try
114 {
115 _documentLocation = documentLocation;
116
117 URL url = documentLocation.getResourceURL();
118
119 if (url == null)
120 throw new DocumentParseException(Tapestry.format("RuleDrivenParser.resource-missing", documentLocation), documentLocation);
121
122 return parse(url);
123 }
124 finally
125 {
126 _documentLocation = null;
127 _ruleStack.clear();
128 _objectStack.clear();
129 _documentObject = null;
130
131 _uri = null;
132 _localName = null;
133 _qName = null;
134
135 _line = -1;
136 _column = -1;
137 _location = null;
138 _locator = null;
139
140 _contentBuffer.setLength(0);
141 }
142 }
143
144 protected Object parse(URL url)
145 {
146 if (_parser == null)
147 _parser = constructParser();
148
149 InputStream stream = null;
150
151 try
152 {
153 stream = url.openStream();
154 }
155 catch (IOException ex)
156 {
157 throw new DocumentParseException(Tapestry.format(
158 "RuleDrivenParser.unable-to-open-resource",
159 url), _documentLocation, ex);
160 }
161
162 InputSource source = new InputSource(stream);
163
164 try
165 {
166 _parser.parse(source, this);
167
168 stream.close();
169 }
170 catch (Exception ex)
171 {
172 throw new DocumentParseException(Tapestry.format(
173 "RuleDrivenParser.parse-error",
174 url,
175 ex.getMessage()), getLocation(), ex);
176 }
177
178 if (LOG.isDebugEnabled())
179 LOG.debug("Document parsed as: " + _documentObject);
180
181 return _documentObject;
182 }
183
184 /**
185 * Returns an {@link Location}representing the current position within the document (depending
186 * on the parser, this may be accurate to column number level).
187 */
188
189 public Location getLocation()
190 {
191 if (_locator == null)
192 return null;
193
194 int line = _locator.getLineNumber();
195 int column = _locator.getColumnNumber();
196
197 if (_line != line || _column != column)
198 {
199 _location = null;
200 _line = line;
201 _column = column;
202 }
203
204 if (_location == null)
205 _location = new LocationImpl(_documentLocation, _line, _column);
206
207 return _location;
208 }
209
210 /**
211 * Pushes an object onto the object stack. The first object pushed is the "document object", the
212 * root object returned by the parse.
213 */
214 public void push(Object object)
215 {
216 if (_documentObject == null)
217 _documentObject = object;
218
219 push(_objectStack, object, "object stack");
220 }
221
222 /**
223 * Returns the top object on the object stack.
224 */
225 public Object peek()
226 {
227 return peek(_objectStack, 0);
228 }
229
230 /**
231 * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
232 * the next-to-top object, etc.
233 */
234
235 public Object peek(int depth)
236 {
237 return peek(_objectStack, depth);
238 }
239
240 /**
241 * Removes and returns the top object on the object stack.
242 */
243 public Object pop()
244 {
245 return pop(_objectStack, "object stack");
246 }
247
248 private Object pop(List list, String name)
249 {
250 Object result = list.remove(list.size() - 1);
251
252 if (LOG.isDebugEnabled())
253 LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
254
255 return result;
256 }
257
258 private Object peek(List list, int depth)
259 {
260 return list.get(list.size() - 1 - depth);
261 }
262
263 private void push(List list, Object object, String name)
264 {
265 if (LOG.isDebugEnabled())
266 LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
267
268 list.add(object);
269 }
270
271 /**
272 * Pushes a new rule onto the rule stack.
273 */
274
275 protected void pushRule(IRule rule)
276 {
277 push(_ruleStack, rule, "rule stack");
278 }
279
280 /**
281 * Returns the top rule on the stack.
282 */
283
284 protected IRule peekRule()
285 {
286 return (IRule) peek(_ruleStack, 0);
287 }
288
289 protected IRule popRule()
290 {
291 return (IRule) pop(_ruleStack, "rule stack");
292 }
293
294 public void addRule(String localElementName, IRule rule)
295 {
296 _ruleMap.put(localElementName, rule);
297 }
298
299 /**
300 * Registers a public id and corresponding input source. Generally, the source is a wrapper
301 * around an input stream to a package resource.
302 *
303 * @param publicId
304 * the public identifier to be registerred, generally the publicId of a DTD related
305 * to the document being parsed
306 * @param entityPath
307 * the resource path of the entity, typically a DTD file. Relative files names are
308 * expected to be stored in the same package as the class file, otherwise a leading
309 * slash is an absolute pathname within the classpath.
310 */
311
312 public void registerEntity(String publicId, String entityPath)
313 {
314 if (LOG.isDebugEnabled())
315 LOG.debug("Registering " + publicId + " as " + entityPath);
316
317 if (_entities == null)
318 _entities = new HashMap();
319
320 _entities.put(publicId, entityPath);
321 }
322
323 protected IRule selectRule(String localName, Attributes attributes)
324 {
325 IRule rule = (IRule) _ruleMap.get(localName);
326
327 if (rule == null)
328 throw new DocumentParseException(Tapestry.format(
329 "RuleDrivenParser.no-rule-for-element",
330 localName), getLocation());
331
332 return rule;
333 }
334
335 /**
336 * Uses the {@link Locator}to track the position in the document as a {@link Location}. This
337 * is invoked once (before the initial element is parsed) and the Locator is retained and
338 * queried as to the current file location.
339 *
340 * @see #getLocation()
341 */
342 public void setDocumentLocator(Locator locator)
343 {
344 _locator = locator;
345 }
346
347 /**
348 * Accumulates the content in a buffer; the concatinated content is provided to the top rule
349 * just before any start or end tag.
350 */
351 public void characters(char[] ch, int start, int length) throws SAXException
352 {
353 _contentBuffer.append(ch, start, length);
354 }
355
356 /**
357 * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}.
358 */
359 public void endElement(String uri, String localName, String qName) throws SAXException
360 {
361 fireContentRule();
362
363 _uri = uri;
364 _localName = localName;
365 _qName = qName;
366
367 popRule().endElement(this);
368 }
369
370 /**
371 * Ignorable content is ignored.
372 */
373 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
374 {
375 }
376
377 /**
378 * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
379 * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
380 */
381 public void startElement(String uri, String localName, String qName, Attributes attributes)
382 throws SAXException
383 {
384 fireContentRule();
385
386 _uri = uri;
387 _localName = localName;
388 _qName = qName;
389
390 String name = extractName(uri, localName, qName);
391
392 IRule newRule = selectRule(name, attributes);
393
394 pushRule(newRule);
395
396 newRule.startElement(this, attributes);
397 }
398
399 private String extractName(String uri, String localName, String qName)
400 {
401 return HiveMind.isBlank(localName) ? qName : localName;
402 }
403
404 /**
405 * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
406 * parser.
407 */
408 protected synchronized SAXParser constructParser()
409 {
410 if (_parserFactory == null)
411 {
412 _parserFactory = SAXParserFactory.newInstance();
413 configureParserFactory(_parserFactory);
414 }
415
416 try
417 {
418 return _parserFactory.newSAXParser();
419 }
420 catch (SAXException ex)
421 {
422 throw new ApplicationRuntimeException(ex);
423 }
424 catch (ParserConfigurationException ex)
425 {
426 throw new ApplicationRuntimeException(ex);
427 }
428
429 }
430
431 /**
432 * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
433 * invoked. The default implementation sets validating to true and namespaceAware to false,
434 */
435
436 protected void configureParserFactory(SAXParserFactory factory)
437 {
438 factory.setValidating(true);
439 factory.setNamespaceAware(false);
440 }
441
442 /**
443 * Throws the exception.
444 */
445 public void error(SAXParseException ex) throws SAXException
446 {
447 fatalError(ex);
448 }
449
450 /**
451 * Throws the exception.
452 */
453 public void fatalError(SAXParseException ex) throws SAXException
454 {
455 // Sometimes, a bad parse "corrupts" a parser so that it doesn't
456 // work properly for future parses (of valid documents),
457 // so discard it here.
458
459 _parser = null;
460
461 throw ex;
462 }
463
464 /**
465 * Throws the exception.
466 */
467 public void warning(SAXParseException ex) throws SAXException
468 {
469 fatalError(ex);
470 }
471
472 public InputSource resolveEntity(String publicId, String systemId) throws SAXException
473 {
474 String entityPath = null;
475
476 if (LOG.isDebugEnabled())
477 LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
478 + systemId);
479
480 if (_entities != null)
481 entityPath = (String) _entities.get(publicId);
482
483 if (entityPath == null)
484 {
485 if (LOG.isDebugEnabled())
486 LOG.debug("Entity not found, using " + systemId);
487
488 return null;
489 }
490
491 InputStream stream = getClass().getResourceAsStream(entityPath);
492
493 InputSource result = new InputSource(stream);
494
495 if (result != null && LOG.isDebugEnabled())
496 LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
497
498 return result;
499 }
500
501 /**
502 * Validates that the input value matches against the specified Perl5 pattern. If valid, the
503 * method simply returns. If not a match, then an error message is generated (using the errorKey
504 * and the input value) and a {@link InvalidStringException}is thrown.
505 */
506
507 public void validate(String value, String pattern, String errorKey)
508 {
509 if (_matcher == null)
510 _matcher = new RegexpMatcher();
511
512 if (_matcher.matches(pattern, value))
513 return;
514
515 throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
516 }
517
518 public Resource getDocumentLocation()
519 {
520 return _documentLocation;
521 }
522
523 /**
524 * Returns the localName for the current element.
525 *
526 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
527 * java.lang.String, org.xml.sax.Attributes)
528 */
529 public String getLocalName()
530 {
531 return _localName;
532 }
533
534 /**
535 * Returns the qualified name for the current element.
536 *
537 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
538 * java.lang.String, org.xml.sax.Attributes)
539 */
540 public String getQName()
541 {
542 return _qName;
543 }
544
545 /**
546 * Returns the URI for the current element.
547 *
548 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
549 * java.lang.String, org.xml.sax.Attributes)
550 */
551 public String getUri()
552 {
553 return _uri;
554 }
555
556 private void fireContentRule()
557 {
558 String content = _contentBuffer.toString();
559 _contentBuffer.setLength(0);
560
561 if (!_ruleStack.isEmpty())
562 peekRule().content(this, content);
563 }
564
565 }