001    // Copyright 2004, 2005 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry.util.xml;
016    
017    import org.apache.commons.logging.Log;
018    import org.apache.commons.logging.LogFactory;
019    import org.apache.hivemind.ApplicationRuntimeException;
020    import org.apache.hivemind.HiveMind;
021    import org.apache.hivemind.Location;
022    import org.apache.hivemind.Resource;
023    import org.apache.hivemind.impl.LocationImpl;
024    import org.apache.tapestry.Tapestry;
025    import org.apache.tapestry.util.RegexpMatcher;
026    import org.xml.sax.*;
027    import org.xml.sax.helpers.DefaultHandler;
028    
029    import javax.xml.parsers.ParserConfigurationException;
030    import javax.xml.parsers.SAXParser;
031    import javax.xml.parsers.SAXParserFactory;
032    import java.io.IOException;
033    import java.io.InputStream;
034    import java.net.URL;
035    import java.util.ArrayList;
036    import java.util.HashMap;
037    import java.util.List;
038    import java.util.Map;
039    
040    /**
041     * A simplified version of org.apache.commons.digester.Digester. This version is without as
042     * many bells and whistles but has some key features needed when parsing a document (rather than a
043     * configuration file): <br>
044     * <ul>
045     * <li>Notifications for each bit of text</li>
046     * <li>Tracking of exact location within the document.</li>
047     * </ul>
048     * <p>
049     * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
050     * coding), in that there's a one-to-one relationship between an element and a rule.
051     * <p>
052     * Based on SAX2.
053     *
054     * @author Howard Lewis Ship
055     * @since 3.0
056     */
057    
058    public class RuleDirectedParser extends DefaultHandler
059    {
060        private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
061    
062        private static SAXParserFactory _parserFactory;
063    
064        private Resource _documentLocation;
065    
066        private List _ruleStack = new ArrayList();
067    
068        private List _objectStack = new ArrayList();
069    
070        private Object _documentObject;
071    
072        private Locator _locator;
073    
074        private int _line = -1;
075    
076        private int _column = -1;
077    
078        private Location _location;
079    
080        private SAXParser _parser;
081    
082        private RegexpMatcher _matcher;
083    
084        private String _uri;
085    
086        private String _localName;
087    
088        private String _qName;
089    
090        /**
091         * Map of {@link IRule}keyed on the local name of the element.
092         */
093        private Map _ruleMap = new HashMap();
094    
095        /**
096         * Used to accumlate content provided by
097         * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
098         */
099    
100        private StringBuffer _contentBuffer = new StringBuffer();
101    
102        /**
103         * Map of paths to external entities (such as the DTD) keyed on public id.
104         */
105    
106        private Map _entities = new HashMap();
107    
108        public Object parse(Resource documentLocation)
109        {
110            if (LOG.isDebugEnabled())
111                LOG.debug("Parsing: " + documentLocation);
112    
113            try
114            {
115                _documentLocation = documentLocation;
116    
117                URL url = documentLocation.getResourceURL();
118    
119                if (url == null)
120                    throw new DocumentParseException(Tapestry.format("RuleDrivenParser.resource-missing", documentLocation), documentLocation);
121    
122                return parse(url);
123            }
124            finally
125            {
126                _documentLocation = null;
127                _ruleStack.clear();
128                _objectStack.clear();
129                _documentObject = null;
130    
131                _uri = null;
132                _localName = null;
133                _qName = null;
134    
135                _line = -1;
136                _column = -1;
137                _location = null;
138                _locator = null;
139    
140                _contentBuffer.setLength(0);
141            }
142        }
143    
144        protected Object parse(URL url)
145        {
146            if (_parser == null)
147                _parser = constructParser();
148    
149            InputStream stream = null;
150    
151            try
152            {
153                stream = url.openStream();
154            }
155            catch (IOException ex)
156            {
157                throw new DocumentParseException(Tapestry.format(
158                  "RuleDrivenParser.unable-to-open-resource",
159                  url), _documentLocation, ex);
160            }
161    
162            InputSource source = new InputSource(stream);
163    
164            try
165            {
166                _parser.parse(source, this);
167    
168                stream.close();
169            }
170            catch (Exception ex)
171            {
172                throw new DocumentParseException(Tapestry.format(
173                  "RuleDrivenParser.parse-error",
174                  url,
175                  ex.getMessage()), getLocation(), ex);
176            }
177    
178            if (LOG.isDebugEnabled())
179                LOG.debug("Document parsed as: " + _documentObject);
180    
181            return _documentObject;
182        }
183    
184        /**
185         * Returns an {@link Location}representing the current position within the document (depending
186         * on the parser, this may be accurate to column number level).
187         */
188    
189        public Location getLocation()
190        {
191            if (_locator == null)
192                return null;
193    
194            int line = _locator.getLineNumber();
195            int column = _locator.getColumnNumber();
196    
197            if (_line != line || _column != column)
198            {
199                _location = null;
200                _line = line;
201                _column = column;
202            }
203    
204            if (_location == null)
205                _location = new LocationImpl(_documentLocation, _line, _column);
206    
207            return _location;
208        }
209    
210        /**
211         * Pushes an object onto the object stack. The first object pushed is the "document object", the
212         * root object returned by the parse.
213         */
214        public void push(Object object)
215        {
216            if (_documentObject == null)
217                _documentObject = object;
218    
219            push(_objectStack, object, "object stack");
220        }
221    
222        /**
223         * Returns the top object on the object stack.
224         */
225        public Object peek()
226        {
227            return peek(_objectStack, 0);
228        }
229    
230        /**
231         * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
232         * the next-to-top object, etc.
233         */
234    
235        public Object peek(int depth)
236        {
237            return peek(_objectStack, depth);
238        }
239    
240        /**
241         * Removes and returns the top object on the object stack.
242         */
243        public Object pop()
244        {
245            return pop(_objectStack, "object stack");
246        }
247    
248        private Object pop(List list, String name)
249        {
250            Object result = list.remove(list.size() - 1);
251    
252            if (LOG.isDebugEnabled())
253                LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
254    
255            return result;
256        }
257    
258        private Object peek(List list, int depth)
259        {
260            return list.get(list.size() - 1 - depth);
261        }
262    
263        private void push(List list, Object object, String name)
264        {
265            if (LOG.isDebugEnabled())
266                LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
267    
268            list.add(object);
269        }
270    
271        /**
272         * Pushes a new rule onto the rule stack.
273         */
274    
275        protected void pushRule(IRule rule)
276        {
277            push(_ruleStack, rule, "rule stack");
278        }
279    
280        /**
281         * Returns the top rule on the stack.
282         */
283    
284        protected IRule peekRule()
285        {
286            return (IRule) peek(_ruleStack, 0);
287        }
288    
289        protected IRule popRule()
290        {
291            return (IRule) pop(_ruleStack, "rule stack");
292        }
293    
294        public void addRule(String localElementName, IRule rule)
295        {
296            _ruleMap.put(localElementName, rule);
297        }
298    
299        /**
300         * Registers a public id and corresponding input source. Generally, the source is a wrapper
301         * around an input stream to a package resource.
302         *
303         * @param publicId
304         *            the public identifier to be registerred, generally the publicId of a DTD related
305         *            to the document being parsed
306         * @param entityPath
307         *            the resource path of the entity, typically a DTD file. Relative files names are
308         *            expected to be stored in the same package as the class file, otherwise a leading
309         *            slash is an absolute pathname within the classpath.
310         */
311    
312        public void registerEntity(String publicId, String entityPath)
313        {
314            if (LOG.isDebugEnabled())
315                LOG.debug("Registering " + publicId + " as " + entityPath);
316    
317            if (_entities == null)
318                _entities = new HashMap();
319    
320            _entities.put(publicId, entityPath);
321        }
322    
323        protected IRule selectRule(String localName, Attributes attributes)
324        {
325            IRule rule = (IRule) _ruleMap.get(localName);
326    
327            if (rule == null)
328                throw new DocumentParseException(Tapestry.format(
329                  "RuleDrivenParser.no-rule-for-element",
330                  localName), getLocation());
331    
332            return rule;
333        }
334    
335        /**
336         * Uses the {@link Locator}to track the position in the document as a {@link Location}. This
337         * is invoked once (before the initial element is parsed) and the Locator is retained and
338         * queried as to the current file location.
339         *
340         * @see #getLocation()
341         */
342        public void setDocumentLocator(Locator locator)
343        {
344            _locator = locator;
345        }
346    
347        /**
348         * Accumulates the content in a buffer; the concatinated content is provided to the top rule
349         * just before any start or end tag.
350         */
351        public void characters(char[] ch, int start, int length) throws SAXException
352        {
353            _contentBuffer.append(ch, start, length);
354        }
355    
356        /**
357         * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}.
358         */
359        public void endElement(String uri, String localName, String qName) throws SAXException
360        {
361            fireContentRule();
362    
363            _uri = uri;
364            _localName = localName;
365            _qName = qName;
366    
367            popRule().endElement(this);
368        }
369    
370        /**
371         * Ignorable content is ignored.
372         */
373        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
374        {
375        }
376    
377        /**
378         * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
379         * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
380         */
381        public void startElement(String uri, String localName, String qName, Attributes attributes)
382          throws SAXException
383        {
384            fireContentRule();
385    
386            _uri = uri;
387            _localName = localName;
388            _qName = qName;
389    
390            String name = extractName(uri, localName, qName);
391    
392            IRule newRule = selectRule(name, attributes);
393    
394            pushRule(newRule);
395    
396            newRule.startElement(this, attributes);
397        }
398    
399        private String extractName(String uri, String localName, String qName)
400        {
401            return HiveMind.isBlank(localName) ? qName : localName;
402        }
403    
404        /**
405         * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
406         * parser.
407         */
408        protected synchronized SAXParser constructParser()
409        {
410            if (_parserFactory == null)
411            {
412                _parserFactory = SAXParserFactory.newInstance();
413                configureParserFactory(_parserFactory);
414            }
415    
416            try
417            {
418                return _parserFactory.newSAXParser();
419            }
420            catch (SAXException ex)
421            {
422                throw new ApplicationRuntimeException(ex);
423            }
424            catch (ParserConfigurationException ex)
425            {
426                throw new ApplicationRuntimeException(ex);
427            }
428    
429        }
430    
431        /**
432         * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
433         * invoked. The default implementation sets validating to true and namespaceAware to false,
434         */
435    
436        protected void configureParserFactory(SAXParserFactory factory)
437        {
438            factory.setValidating(true);
439            factory.setNamespaceAware(false);
440        }
441    
442        /**
443         * Throws the exception.
444         */
445        public void error(SAXParseException ex) throws SAXException
446        {
447            fatalError(ex);
448        }
449    
450        /**
451         * Throws the exception.
452         */
453        public void fatalError(SAXParseException ex) throws SAXException
454        {
455            // Sometimes, a bad parse "corrupts" a parser so that it doesn't
456            // work properly for future parses (of valid documents),
457            // so discard it here.
458    
459            _parser = null;
460    
461            throw ex;
462        }
463    
464        /**
465         * Throws the exception.
466         */
467        public void warning(SAXParseException ex) throws SAXException
468        {
469            fatalError(ex);
470        }
471    
472        public InputSource resolveEntity(String publicId, String systemId) throws SAXException
473        {
474            String entityPath = null;
475    
476            if (LOG.isDebugEnabled())
477                LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
478                          + systemId);
479    
480            if (_entities != null)
481                entityPath = (String) _entities.get(publicId);
482    
483            if (entityPath == null)
484            {
485                if (LOG.isDebugEnabled())
486                    LOG.debug("Entity not found, using " + systemId);
487    
488                return null;
489            }
490    
491            InputStream stream = getClass().getResourceAsStream(entityPath);
492    
493            InputSource result = new InputSource(stream);
494    
495            if (result != null && LOG.isDebugEnabled())
496                LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
497    
498            return result;
499        }
500    
501        /**
502         * Validates that the input value matches against the specified Perl5 pattern. If valid, the
503         * method simply returns. If not a match, then an error message is generated (using the errorKey
504         * and the input value) and a {@link InvalidStringException}is thrown.
505         */
506    
507        public void validate(String value, String pattern, String errorKey)
508        {
509            if (_matcher == null)
510                _matcher = new RegexpMatcher();
511    
512            if (_matcher.matches(pattern, value))
513                return;
514    
515            throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
516        }
517    
518        public Resource getDocumentLocation()
519        {
520            return _documentLocation;
521        }
522    
523        /**
524         * Returns the localName for the current element.
525         *
526         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
527         *      java.lang.String, org.xml.sax.Attributes)
528         */
529        public String getLocalName()
530        {
531            return _localName;
532        }
533    
534        /**
535         * Returns the qualified name for the current element.
536         *
537         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
538         *      java.lang.String, org.xml.sax.Attributes)
539         */
540        public String getQName()
541        {
542            return _qName;
543        }
544    
545        /**
546         * Returns the URI for the current element.
547         *
548         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
549         *      java.lang.String, org.xml.sax.Attributes)
550         */
551        public String getUri()
552        {
553            return _uri;
554        }
555    
556        private void fireContentRule()
557        {
558            String content = _contentBuffer.toString();
559            _contentBuffer.setLength(0);
560    
561            if (!_ruleStack.isEmpty())
562                peekRule().content(this, content);
563        }
564    
565    }