001    // Copyright 2004, 2005 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry.parse;
016    
017    import java.util.ArrayList;
018    import java.util.Collections;
019    import java.util.HashMap;
020    import java.util.Iterator;
021    import java.util.List;
022    import java.util.Map;
023    
024    import org.apache.hivemind.ApplicationRuntimeException;
025    import org.apache.hivemind.Location;
026    import org.apache.hivemind.Resource;
027    import org.apache.hivemind.impl.LocationImpl;
028    import org.apache.oro.text.regex.MalformedPatternException;
029    import org.apache.oro.text.regex.MatchResult;
030    import org.apache.oro.text.regex.Pattern;
031    import org.apache.oro.text.regex.PatternMatcher;
032    import org.apache.oro.text.regex.Perl5Compiler;
033    import org.apache.oro.text.regex.Perl5Matcher;
034    import org.apache.tapestry.util.IdAllocator;
035    
036    /**
037     * Parses Tapestry templates, breaking them into a series of
038     * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
039     * template", there is no real requirement that the template be HTML. This parser can handle any
040     * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
041     * HTML reasonably.
042     * <p>
043     * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
044     * <p>
045     * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
046     * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
047     * or end the tag with "<code>/&gt;</code>".
048     * <p>
049     * Generally, the id specified in the template is matched against an component defined in the
050     * specification. However, implicit components are also possible. The jwcid attribute uses the
051     * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
052     *       prefix. Such a component is anonymous (but is given a unique id).
053     *       <p>
054     *       (The unique ids assigned start with a dollar sign, which is normally no allowed for
055     *       component ids ... this helps to make them stand out and assures that they do not conflict
056     *       with user-defined component ids. These ids tend to propagate into URLs and become HTML
057     *       element names and even JavaScript variable names ... the dollar sign is acceptible in these
058     *       contexts as well).
059     *       <p>
060     *       Implicit component may also be given a name using the syntax "
061     *       <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
062     *       specification, but may still be accessed via
063     *       {@link org.apache.tapestry.IComponent#getComponent(String)}.
064     *       <p>
065     *       Both defined and implicit components may have additional attributes defined, simply by
066     *       including them in the template. They set formal or informal parameters of the component to
067     *       static strings.
068     *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
069     *       false, will cause such attributes to be simply ignored. For defined components, conflicting
070     *       values defined in the template are ignored.
071     *       <p>
072     *       Attributes in component tags will become formal and informal parameters of the
073     *       corresponding component. Most attributes will be
074     *       <p>
075     *       The parser removes the body of some tags (when the corresponding component doesn't
076     *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
077     *       allows portions of the template to be completely removed.
078     *       <p>
079     *       The parser does a pretty thorough lexical analysis of the template, and reports a great
080     *       number of errors, including improper nesting of tags.
081     *       <p>
082     *       The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
083     *       form: <code>&lt;span key="<i>value</i>"&gt; ... &lt;/span&gt;</code> and converts them
084     *       into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code>
085     *       attribute ... if the value is <code>true</code>, then the localized value is sent to the
086     *       client without filtering, which is appropriate if the value has any markup that should not
087     *       be escaped.
088     * @author Howard Lewis Ship, Geoff Longman
089     */
090    
091    public class TemplateParser implements ITemplateParser
092    {
093        /**
094         * The attribute, checked for in &lt;span&gt; tags, that signfies that the span is being used as
095         * an invisible localization.
096         * 
097         * @since 2.0.4
098         */
099    
100        public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
101    
102        /**
103         * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be
104         * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
105         * equal "true" (caselessly).
106         * 
107         * @since 2.3
108         */
109    
110        public static final String RAW_ATTRIBUTE_NAME = "raw";
111        
112        public static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
113        
114        /**
115         * Pattern used to recognize ordinary components (defined in the specification).
116         * 
117         * @since 3.0
118         */
119    
120        public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
121        
122        /**
123         * Pattern used to recognize implicit components (whose type is defined in the template).
124         * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
125         * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
126         * type, which may (as of 4.0) have slashes to delinate folders containing the component.
127         * 
128         * @since 3.0
129         */
130    
131        public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
132                + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*"
133                + PROPERTY_NAME_PATTERN + "))$";
134        
135        /**
136         * A "magic" component id that causes the tag with the id and its entire body to be ignored
137         * during parsing.
138         */
139    
140        private static final String REMOVE_ID = "$remove$";
141    
142        /**
143         * A "magic" component id that causes the tag to represent the true content of the template. Any
144         * content prior to the tag is discarded, and any content after the tag is ignored. The tag
145         * itself is not included.
146         */
147    
148        private static final String CONTENT_ID = "$content$";
149    
150        private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
151    
152        private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
153    
154        private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
155    
156        private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
157    
158        private static final char[] COMMENT_START = new char[]
159                                                             { '<', '!', '-', '-' };
160    
161        private static final char[] COMMENT_END = new char[]
162                                                           { '-', '-', '>' };
163    
164        private static final char[] CLOSE_TAG = new char[]
165                                                         { '<', '/' };
166        
167        private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
168    
169        private static final int COLLECT_ATTRIBUTE_NAME = 1;
170    
171        private static final int ADVANCE_PAST_EQUALS = 2;
172    
173        private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
174    
175        private static final int COLLECT_QUOTED_VALUE = 4;
176    
177        private static final int COLLECT_UNQUOTED_VALUE = 5;
178        
179        /**
180         * Conversions needed by {@link #convertEntitiesToPlain(String)}.
181         */
182    
183        private static final String[] CONVERSIONS =
184        { "&lt;", "<", "&gt;", ">", "&quot;", "\"", "&amp;", "&" };
185        
186        /**
187         * Attribute name used to identify components.
188         * 
189         * @since 4.0
190         */
191    
192        private String _componentAttributeName;
193        
194        private Pattern _simpleIdPattern;
195    
196        private Pattern _implicitIdPattern;
197    
198        private PatternMatcher _patternMatcher;
199    
200        private IdAllocator _idAllocator = new IdAllocator();
201    
202        private ITemplateParserDelegate _delegate;
203    
204        /**
205         * Identifies the template being parsed; used with error messages.
206         */
207    
208        private Resource _resourceLocation;
209    
210        /**
211         * Shared instance of {@link Location} used by all {@link TextToken} instances in the template.
212         */
213    
214        private Location _templateLocation;
215    
216        /**
217         * Location with in the resource for the current line.
218         */
219    
220        private Location _currentLocation;
221    
222        /**
223         * Local reference to the template data that is to be parsed.
224         */
225    
226        private char[] _templateData;
227    
228        /**
229         * List of Tag.
230         */
231    
232        private List _stack = new ArrayList();
233    
234        /**
235         * 
236         * @author hls
237         */
238        private static class Tag
239        {
240            // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
241            String _tagName;
242    
243            // If true, the tag is a placeholder for a dynamic element
244            boolean _component;
245    
246            // If true, the body of the tag is being ignored, and the
247            // ignore flag is cleared when the close tag is reached
248            boolean _ignoringBody;
249    
250            // If true, then the entire tag (and its body) is being ignored
251            boolean _removeTag;
252    
253            // If true, then the tag must have a balanced closing tag.
254            // This is always true for components.
255            boolean _mustBalance;
256    
257            // The line on which the start tag exists
258            int _line;
259    
260            // If true, then the parse ends when the closing tag is found.
261            boolean _content;
262    
263            Tag(String tagName, int line)
264            {
265                _tagName = tagName;
266                _line = line;
267            }
268    
269            boolean match(String matchTagName)
270            {
271                return _tagName.equalsIgnoreCase(matchTagName);
272            }
273        }
274    
275        /**
276         * List of {@link TemplateToken}, this forms the ultimate response.
277         */
278    
279        private List _tokens = new ArrayList();
280    
281        /**
282         * The location of the 'cursor' within the template data. The advance() method moves this
283         * forward.
284         */
285    
286        private int _cursor;
287    
288        /**
289         * The start of the current block of static text, or -1 if no block is active.
290         */
291    
292        private int _blockStart;
293    
294        /**
295         * The current line number; tracked by advance(). Starts at 1.
296         */
297    
298        private int _line;
299    
300        /**
301         * Set to true when the body of a tag is being ignored. This is typically used to skip over the
302         * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
303         * of $remove$ is used.
304         */
305    
306        private boolean _ignoring;
307    
308        /**
309         * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
310         */
311    
312        private Map _attributes = new HashMap();
313    
314        /**
315         * A factory used to create template tokens.
316         */
317    
318        private TemplateTokenFactory _factory;
319    
320        public TemplateParser()
321        {
322            Perl5Compiler compiler = new Perl5Compiler();
323    
324            try
325            {
326                _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
327                _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
328            }
329            catch (MalformedPatternException ex)
330            {
331                throw new ApplicationRuntimeException(ex);
332            }
333    
334            _patternMatcher = new Perl5Matcher();
335        }
336    
337        /**
338         * Parses the template data into an array of {@link TemplateToken}s.
339         * <p>
340         * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
341         * thread accesses it.
342         * 
343         * @param templateData
344         *            the HTML template to parse. Some tokens will hold a reference to this array.
345         * @param delegate
346         *            object that "knows" about defined components
347         * @param resourceLocation
348         *            a description of where the template originated from, used with error messages.
349         */
350    
351        public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate, 
352                Resource resourceLocation) throws TemplateParseException
353        {
354            try
355            {
356                beforeParse(templateData, delegate, resourceLocation);
357                
358                parse();
359    
360                return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
361            }
362            finally
363            {
364                afterParse();
365            }
366        }
367    
368        /**
369         * perform default initialization of the parser.
370         */
371    
372        protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate, Resource resourceLocation)
373        {
374            _templateData = templateData;
375            _resourceLocation = resourceLocation;
376            _templateLocation = new LocationImpl(resourceLocation);
377            _delegate = delegate;
378            _ignoring = false;
379            _line = 1;
380            _componentAttributeName = delegate.getComponentAttributeName();
381        }
382    
383        /**
384         * Perform default cleanup after parsing completes.
385         */
386    
387        protected void afterParse()
388        {
389            _delegate = null;
390            _templateData = null;
391            _resourceLocation = null;
392            _templateLocation = null;
393            _currentLocation = null;
394            _stack.clear();
395            _tokens.clear();
396            _attributes.clear();
397            _idAllocator.clear();
398        }
399    
400        /**
401         * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
402         * is reported.
403         * <p>
404         * The default implementation simply throws an exception that contains the message and location
405         * parameters.
406         * <p>
407         * Subclasses may override but <b>must </b> ensure they throw the required exception.
408         * 
409         * @param message
410         * @param location
411         * @param line
412         *            ignored by the default impl
413         * @param cursor
414         *            ignored by the default impl
415         * @throws TemplateParseException
416         *             always thrown in order to terminate the parse.
417         */
418    
419        protected void templateParseProblem(String message, Location location, int line, int cursor)
420                throws TemplateParseException
421        {
422            throw new TemplateParseException(message, location);
423        }
424    
425        /**
426         * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
427         * </b> stop when a problem is reported.
428         * <p>
429         * The default implementation simply rethrows the exception.
430         * <p>
431         * Subclasses may override but <b>must </b> ensure they rethrow the exception.
432         * 
433         * @param exception
434         * @param line
435         *            ignored by the default impl
436         * @param cursor
437         *            ignored by the default impl
438         * @throws ApplicationRuntimeException
439         *             always rethrown in order to terminate the parse.
440         */
441    
442        protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
443        {
444            throw exception;
445        }
446    
447        /**
448         * Give subclasses access to the parse results.
449         */
450        protected List getTokens()
451        {
452            if (_tokens == null)
453                return Collections.EMPTY_LIST;
454    
455            return _tokens;
456        }
457    
458        /**
459         * Checks to see if the next few characters match a given pattern.
460         */
461    
462        private boolean lookahead(char[] match)
463        {
464            try
465            {
466                for (int i = 0; i < match.length; i++)
467                {
468                    if (_templateData[_cursor + i] != match[i])
469                        return false;
470                }
471    
472                // Every character matched.
473    
474                return true;
475            }
476            catch (IndexOutOfBoundsException ex)
477            {
478                return false;
479            }
480        }
481    
482        protected void parse() throws TemplateParseException
483        {
484            _cursor = 0;
485            _blockStart = -1;
486            int length = _templateData.length;
487            
488            while (_cursor < length)
489            {
490                if (_templateData[_cursor] != '<')
491                {
492                    if (_blockStart < 0 && !_ignoring)
493                        _blockStart = _cursor;
494                    
495                    advance();
496                    continue;
497                }
498                
499                // OK, start of something.
500                
501                if (lookahead(CLOSE_TAG))
502                {
503                    closeTag();
504                    continue;
505                }
506    
507                if (lookahead(COMMENT_START))
508                {
509                    skipComment();
510                    continue;
511                }
512                
513                // The start of some tag.
514                
515                startTag();
516            }
517            
518            // Usually there's some text at the end of the template (after the last closing tag) that
519            // should
520            // be added. Often the last few tags are static tags so we definately
521            // need to end the text block.
522            
523            addTextToken(_templateData.length - 1);
524        }
525    
526        /**
527         * Advance forward in the document until the end of the comment is reached. In addition, skip
528         * any whitespace following the comment.
529         */
530    
531        private void skipComment() throws TemplateParseException
532        {
533            int length = _templateData.length;
534            int startLine = _line;
535    
536            if (_blockStart < 0 && !_ignoring)
537                _blockStart = _cursor;
538    
539            while (true)
540            {
541                if (_cursor >= length)
542                    templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
543                            _resourceLocation, startLine), startLine, _cursor);
544    
545                if (lookahead(COMMENT_END))
546                    break;
547    
548                // Not the end of the comment, advance over it.
549    
550                advance();
551            }
552    
553            _cursor += COMMENT_END.length;
554            advanceOverWhitespace();
555        }
556    
557        private void addTextToken(int end)
558        {
559            // No active block to add to.
560    
561            if (_blockStart < 0)
562                return;
563    
564            if (_blockStart <= end)
565            {
566                // This seems odd, shouldn't the location be the current location? I guess
567                // no errors are ever reported for a text token.
568    
569                TemplateToken token = _factory.createTextToken(
570                        _templateData,
571                        _blockStart,
572                        end,
573                        _templateLocation);
574    
575                _tokens.add(token);
576            }
577    
578            _blockStart = -1;
579        }
580    
581        private void startTag() throws TemplateParseException
582        {
583            int cursorStart = _cursor;
584            int length = _templateData.length;
585            String tagName = null;
586            boolean endOfTag = false;
587            boolean emptyTag = false;
588            int startLine = _line;
589            Location startLocation = new LocationImpl(_resourceLocation, startLine);
590    
591            tagBeginEvent(startLine, _cursor);
592    
593            advance();
594    
595            // Collect the element type
596    
597            while (_cursor < length)
598            {
599                char ch = _templateData[_cursor];
600    
601                if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
602                {
603                    tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
604    
605                    break;
606                }
607    
608                advance();
609            }
610    
611            String attributeName = null;
612            int attributeNameStart = -1;
613            int attributeValueStart = -1;
614            int state = WAIT_FOR_ATTRIBUTE_NAME;
615            char quoteChar = 0;
616    
617            _attributes.clear();
618    
619            // Collect each attribute
620    
621            while (!endOfTag)
622            {
623                if (_cursor >= length)
624                {
625                    String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
626                            : ParseMessages.unclosedTag(tagName, startLine);
627    
628                    templateParseProblem(message, startLocation, startLine, cursorStart);
629                }
630    
631                char ch = _templateData[_cursor];
632    
633                switch (state)
634                {
635                    case WAIT_FOR_ATTRIBUTE_NAME:
636    
637                        // Ignore whitespace before the next attribute name, while
638                        // looking for the end of the current tag.
639    
640                        if (ch == '/')
641                        {
642                            emptyTag = true;
643                            advance();
644                            break;
645                        }
646    
647                        if (ch == '>')
648                        {
649                            endOfTag = true;
650                            break;
651                        }
652    
653                        if (Character.isWhitespace(ch))
654                        {
655                            advance();
656                            break;
657                        }
658    
659                        // Found non-whitespace, assume its the attribute name.
660                        // Note: could use a check here for non-alpha.
661    
662                        attributeNameStart = _cursor;
663                        state = COLLECT_ATTRIBUTE_NAME;
664                        advance();
665                        break;
666    
667                    case COLLECT_ATTRIBUTE_NAME:
668    
669                        // Looking for end of attribute name.
670    
671                        if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
672                        {
673                            attributeName = new String(_templateData, attributeNameStart, _cursor
674                                    - attributeNameStart);
675    
676                            state = ADVANCE_PAST_EQUALS;
677                            break;
678                        }
679    
680                        // Part of the attribute name
681    
682                        advance();
683                        break;
684    
685                    case ADVANCE_PAST_EQUALS:
686    
687                        // Looking for the '=' sign. May hit the end of the tag, or (for bare
688                        // attributes),
689                        // the next attribute name.
690    
691                        if (ch == '/' || ch == '>')
692                        {
693                            // A bare attribute, which is not interesting to
694                            // us.
695    
696                            state = WAIT_FOR_ATTRIBUTE_NAME;
697                            break;
698                        }
699    
700                        if (Character.isWhitespace(ch))
701                        {
702                            advance();
703                            break;
704                        }
705    
706                        if (ch == '=')
707                        {
708                            state = WAIT_FOR_ATTRIBUTE_VALUE;
709                            quoteChar = 0;
710                            attributeValueStart = -1;
711                            advance();
712                            break;
713                        }
714    
715                        // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
716                        // We aren't interested in those (we're just looking for the id or jwcid
717                        // attribute).
718    
719                        state = WAIT_FOR_ATTRIBUTE_NAME;
720                        break;
721    
722                    case WAIT_FOR_ATTRIBUTE_VALUE:
723    
724                        if (ch == '/' || ch == '>')
725                            templateParseProblem(ParseMessages.missingAttributeValue(
726                                    tagName,
727                                    _line,
728                                    attributeName), getCurrentLocation(), _line, _cursor);
729    
730                        // Ignore whitespace between '=' and the attribute value. Also, look
731                        // for initial quote.
732    
733                        if (Character.isWhitespace(ch))
734                        {
735                            advance();
736                            break;
737                        }
738    
739                        if (ch == '\'' || ch == '"')
740                        {
741                            quoteChar = ch;
742    
743                            state = COLLECT_QUOTED_VALUE;
744                            advance();
745                            attributeValueStart = _cursor;
746                            attributeBeginEvent(attributeName, _line, attributeValueStart);
747                            break;
748                        }
749    
750                        // Not whitespace or quote, must be start of unquoted attribute.
751    
752                        state = COLLECT_UNQUOTED_VALUE;
753                        attributeValueStart = _cursor;
754                        attributeBeginEvent(attributeName, _line, attributeValueStart);
755                        break;
756    
757                    case COLLECT_QUOTED_VALUE:
758    
759                        // Start collecting the quoted attribute value. Stop at the matching quote
760                        // character,
761                        // unless bare, in which case, stop at the next whitespace.
762    
763                        if (ch == quoteChar)
764                        {
765                            String attributeValue = new String(_templateData, attributeValueStart,
766                                    _cursor - attributeValueStart);
767    
768                            attributeEndEvent(_cursor);
769    
770                            addAttributeIfUnique(tagName, attributeName, attributeValue);
771    
772                            // Advance over the quote.
773                            advance();
774                            state = WAIT_FOR_ATTRIBUTE_NAME;
775                            break;
776                        }
777    
778                        advance();
779                        break;
780    
781                    case COLLECT_UNQUOTED_VALUE:
782    
783                        // An unquoted attribute value ends with whitespace
784                        // or the end of the enclosing tag.
785    
786                        if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
787                        {
788                            String attributeValue = new String(_templateData, attributeValueStart,
789                                    _cursor - attributeValueStart);
790    
791                            attributeEndEvent(_cursor);
792                            addAttributeIfUnique(tagName, attributeName, attributeValue);
793    
794                            state = WAIT_FOR_ATTRIBUTE_NAME;
795                            break;
796                        }
797    
798                        advance();
799                        break;
800                }
801            }
802    
803            tagEndEvent(_cursor);
804    
805            // Check for invisible localizations
806            
807            String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
808            String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
809            
810            if (localizationKey != null && jwcId == null)
811            {
812                if (_ignoring)
813                    templateParseProblem(
814                            ParseMessages.componentMayNotBeIgnored(tagName, startLine),
815                            startLocation,
816                            startLine,
817                            cursorStart);
818                
819                // If the tag isn't empty, then create a Tag instance to ignore the
820                // body of the tag.
821                
822                if (!emptyTag)
823                {
824                    Tag tag = new Tag(tagName, startLine);
825                    
826                    tag._component = false;
827                    tag._removeTag = false;
828                    tag._ignoringBody = true;
829                    tag._mustBalance = true;
830                    
831                    _stack.add(tag);
832                    
833                    // Start ignoring content until the close tag.
834                    
835                    _ignoring = true;
836                }
837                else
838                {
839                    // Cursor is at the closing carat, advance over it.
840                    advance();
841                    // TAPESTRY-359: *don't* skip whitespace advanceOverWhitespace()
842                }
843                
844                // End any open block.
845                
846                addTextToken(cursorStart - 1);
847                
848                boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
849                
850                Map attributes = filter(_attributes, new String[] { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
851                
852                TemplateToken token = _factory.createLocalizationToken(
853                        tagName,
854                        localizationKey,
855                        raw,
856                        attributes,
857                        startLocation);
858                
859                _tokens.add(token);
860                
861                return;
862            }
863            
864            if (jwcId != null)
865            {
866                processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
867                return;
868            }
869    
870            // A static tag (not a tag without a jwcid attribute).
871            // We need to record this so that we can match close tags later.
872    
873            if (!emptyTag)
874            {
875                Tag tag = new Tag(tagName, startLine);
876                _stack.add(tag);
877            }
878    
879            // If there wasn't an active block, then start one.
880    
881            if (_blockStart < 0 && !_ignoring)
882                _blockStart = cursorStart;
883    
884            advance();
885        }
886    
887        /**
888         * @throws TemplateParseException
889         * @since 4.0
890         */
891    
892        private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
893                throws TemplateParseException
894        {
895    
896            if (_attributes.containsKey(attributeName))
897                templateParseProblem(
898                        ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
899                        getCurrentLocation(),
900                        _line,
901                        _cursor);
902    
903            _attributes.put(attributeName, attributeValue);
904        }
905    
906        /**
907         * Processes a tag that is the open tag for a component (but also handles the $remove$ and
908         * $content$ tags).
909         */
910    
911        /**
912         * Notify that the beginning of a tag has been detected.
913         * <p>
914         * Default implementation does nothing.
915         */
916        protected void tagBeginEvent(int startLine, int cursorPosition)
917        {
918        }
919    
920        /**
921         * Notify that the end of the current tag has been detected.
922         * <p>
923         * Default implementation does nothing.
924         */
925        protected void tagEndEvent(int cursorPosition)
926        {
927        }
928    
929        /**
930         * Notify that the beginning of an attribute value has been detected.
931         * <p>
932         * Default implementation does nothing.
933         */
934        protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
935        {
936        }
937    
938        /**
939         * Notify that the end of the current attribute value has been detected.
940         * <p>
941         * Default implementation does nothing.
942         */
943        protected void attributeEndEvent(int cursorPosition)
944        {
945        }
946    
947        private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
948                int startLine, int cursorStart, Location startLocation) throws TemplateParseException
949        {
950            String componentId = jwcId;
951            if (componentId.equalsIgnoreCase(CONTENT_ID))
952            {
953                processContentTag(tagName, startLine, cursorStart, emptyTag);
954    
955                return;
956            }
957    
958            boolean isRemoveId = componentId.equalsIgnoreCase(REMOVE_ID);
959    
960            if (_ignoring && !isRemoveId)
961                templateParseProblem(
962                        ParseMessages.componentMayNotBeIgnored(tagName, startLine),
963                        startLocation,
964                        startLine,
965                        cursorStart);
966    
967            String type = null;
968            boolean allowBody = false;
969    
970            if (_patternMatcher.matches(componentId, _implicitIdPattern))
971            {
972                MatchResult match = _patternMatcher.getMatch();
973    
974                componentId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
975                type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
976    
977                String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
978                String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
979    
980                // If (and this is typical) no actual component id was specified,
981                // then generate one on the fly.
982                // The allocated id for anonymous components is
983                // based on the simple (unprefixed) type, but starts
984                // with a leading dollar sign to ensure no conflicts
985                // with user defined component ids (which don't allow dollar signs
986                // in the id).
987                // New for 4.0: the component type may included slashes ('/'), but these
988                // are not valid identifiers, so we convert them to '$'.
989    
990                if (componentId == null)
991                    componentId = _idAllocator.allocateId("$" + simpleType.replace('/', '$'));
992    
993                try
994                {
995                    allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
996                }
997                catch (ApplicationRuntimeException e)
998                {
999                    // give subclasses a chance to handle and rethrow
1000                    templateParseProblem(e, startLine, cursorStart);
1001                }
1002    
1003            }
1004            else
1005            {
1006                if (!isRemoveId)
1007                {
1008                    if (!_patternMatcher.matches(componentId, _simpleIdPattern))
1009                        templateParseProblem(
1010                                ParseMessages.componentIdInvalid(tagName, startLine, componentId),
1011                                startLocation,
1012                                startLine,
1013                                cursorStart);
1014    
1015                    if (!_delegate.getKnownComponent(componentId))
1016                        templateParseProblem(
1017                                ParseMessages.unknownComponentId(tagName, startLine, componentId),
1018                                startLocation,
1019                                startLine,
1020                                cursorStart);
1021    
1022                    try
1023                    {
1024                        allowBody = _delegate.getAllowBody(componentId, startLocation);
1025                    }
1026                    catch (ApplicationRuntimeException e)
1027                    {
1028                        // give subclasses a chance to handle and rethrow
1029                        templateParseProblem(e, startLine, cursorStart);
1030                    }
1031                }
1032            }
1033    
1034            // Ignore the body if we're removing the entire tag,
1035            // of if the corresponding component doesn't allow
1036            // a body.
1037    
1038            boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
1039    
1040            if (_ignoring && ignoreBody)
1041                templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
1042                        _resourceLocation, startLine), startLine, cursorStart);
1043    
1044            if (!emptyTag)
1045                pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
1046    
1047            // End any open block.
1048    
1049            addTextToken(cursorStart - 1);
1050    
1051            if (!isRemoveId)
1052            {
1053                addOpenToken(tagName, componentId, type, startLocation);
1054    
1055                if (emptyTag)
1056                    _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1057            }
1058    
1059            advance();
1060        }
1061    
1062        private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
1063        {
1064            Tag tag = new Tag(tagName, startLine);
1065    
1066            tag._component = !isRemoveId;
1067            tag._removeTag = isRemoveId;
1068    
1069            tag._ignoringBody = ignoreBody;
1070    
1071            _ignoring = tag._ignoringBody;
1072    
1073            tag._mustBalance = true;
1074    
1075            _stack.add(tag);
1076        }
1077    
1078        private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
1079                throws TemplateParseException
1080        {
1081            if (_ignoring)
1082                templateParseProblem(
1083                        ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
1084                        new LocationImpl(_resourceLocation, startLine),
1085                        startLine,
1086                        cursorStart);
1087    
1088            if (emptyTag)
1089                templateParseProblem(
1090                        ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
1091                        new LocationImpl(_resourceLocation, startLine),
1092                        startLine,
1093                        cursorStart);
1094    
1095            _tokens.clear();
1096            _blockStart = -1;
1097    
1098            Tag tag = new Tag(tagName, startLine);
1099    
1100            tag._mustBalance = true;
1101            tag._content = true;
1102    
1103            _stack.clear();
1104            _stack.add(tag);
1105    
1106            advance();
1107        }
1108    
1109        private void addOpenToken(String tagName, String jwcId, String type, Location location)
1110        {
1111            OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
1112            _tokens.add(token);
1113    
1114            if (_attributes.isEmpty())
1115                return;
1116    
1117            Iterator i = _attributes.entrySet().iterator();
1118            while (i.hasNext())
1119            {
1120                Map.Entry entry = (Map.Entry) i.next();
1121    
1122                String key = (String) entry.getKey();
1123    
1124                if (key.equalsIgnoreCase(_componentAttributeName))
1125                    continue;
1126    
1127                String value = (String) entry.getValue();
1128    
1129                addAttributeToToken(token, key, value);
1130            }
1131        }
1132    
1133        /**
1134         * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
1135         * 
1136         * @since 3.0
1137         */
1138    
1139        private void addAttributeToToken(OpenToken token, String name, String attributeValue)
1140        {
1141            token.addAttribute(name, convertEntitiesToPlain(attributeValue));
1142        }
1143    
1144        /**
1145         * Invoked to handle a closing tag, i.e., &lt;/foo&gt;. When a tag closes, it will match against
1146         * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
1147         * balanced), but this is HTML, not XML, so many tags won't balance.
1148         * <p>
1149         * Once the matching tag is located, the question is ... is the tag dynamic or static? If
1150         * static, then the current text block is extended to include this close tag. If dynamic, then
1151         * the current text block is ended (before the '&lt;' that starts the tag) and a close token is
1152         * added.
1153         * <p>
1154         * In either case, the matching static element and anything above it is removed, and the cursor
1155         * is left on the character following the '&gt;'.
1156         */
1157    
1158        private void closeTag() throws TemplateParseException
1159        {
1160            int cursorStart = _cursor;
1161            int length = _templateData.length;
1162            int startLine = _line;
1163    
1164            Location startLocation = getCurrentLocation();
1165    
1166            _cursor += CLOSE_TAG.length;
1167    
1168            int tagStart = _cursor;
1169    
1170            while (true)
1171            {
1172                if (_cursor >= length)
1173                    templateParseProblem(
1174                            ParseMessages.incompleteCloseTag(startLine),
1175                            startLocation,
1176                            startLine,
1177                            cursorStart);
1178    
1179                char ch = _templateData[_cursor];
1180    
1181                if (ch == '>')
1182                    break;
1183    
1184                advance();
1185            }
1186    
1187            String tagName = new String(_templateData, tagStart, _cursor - tagStart);
1188    
1189            int stackPos = _stack.size() - 1;
1190            Tag tag = null;
1191    
1192            while (stackPos >= 0)
1193            {
1194                tag = (Tag) _stack.get(stackPos);
1195    
1196                if (tag.match(tagName))
1197                    break;
1198    
1199                if (tag._mustBalance)
1200                    templateParseProblem(ParseMessages.improperlyNestedCloseTag(
1201                            tagName,
1202                            startLine,
1203                            tag._tagName,
1204                            tag._line), startLocation, startLine, cursorStart);
1205    
1206                stackPos--;
1207            }
1208    
1209            if (stackPos < 0)
1210                templateParseProblem(
1211                        ParseMessages.unmatchedCloseTag(tagName, startLine),
1212                        startLocation,
1213                        startLine,
1214                        cursorStart);
1215    
1216            // Special case for the content tag
1217    
1218            if (tag._content)
1219            {
1220                addTextToken(cursorStart - 1);
1221    
1222                // Advance the cursor right to the end.
1223    
1224                _cursor = length;
1225                _stack.clear();
1226                return;
1227            }
1228    
1229            // When a component closes, add a CLOSE tag.
1230            if (tag._component)
1231            {
1232                addTextToken(cursorStart - 1);
1233    
1234                _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1235            }
1236            else
1237            {
1238                // The close of a static tag. Unless removing the tag
1239                // entirely, make sure the block tag is part of a text block.
1240    
1241                if (_blockStart < 0 && !tag._removeTag && !_ignoring)
1242                    _blockStart = cursorStart;
1243            }
1244    
1245            // Remove all elements at stackPos or above.
1246    
1247            for (int i = _stack.size() - 1; i >= stackPos; i--)
1248                _stack.remove(i);
1249    
1250            // Advance cursor past '>'
1251    
1252            advance();
1253    
1254            // If editting out the tag (i.e., $remove$) then kill any whitespace.
1255            // For components that simply don't contain a body, removeTag will
1256            // be false.
1257    
1258            if (tag._removeTag)
1259                advanceOverWhitespace();
1260    
1261            // If we were ignoring the body of the tag, then clear the ignoring
1262            // flag, since we're out of the body.
1263    
1264            if (tag._ignoringBody)
1265                _ignoring = false;
1266        }
1267    
1268        /**
1269         * Advances the cursor to the next character. If the end-of-line is reached, then increments the
1270         * line counter.
1271         */
1272    
1273        private void advance()
1274        {
1275            int length = _templateData.length;
1276    
1277            if (_cursor >= length)
1278                return;
1279            
1280            char ch = _templateData[_cursor];
1281            
1282            _cursor++;
1283            
1284            if (ch == '\n')
1285            {
1286                _line++;
1287                _currentLocation = null;
1288                return;
1289            }
1290            
1291            // A \r, or a \r\n also counts as a new line.
1292            
1293            if (ch == '\r')
1294            {
1295                _line++;
1296                _currentLocation = null;
1297                
1298                if (_cursor < length && _templateData[_cursor] == '\n')
1299                    _cursor++;
1300                
1301                return;
1302            }
1303            
1304            // Not an end-of-line character.
1305        }
1306    
1307        private void advanceOverWhitespace()
1308        {
1309            int length = _templateData.length;
1310    
1311            while (_cursor < length)
1312            {
1313                char ch = _templateData[_cursor];
1314                if (!Character.isWhitespace(ch))
1315                    return;
1316    
1317                advance();
1318            }
1319        }
1320    
1321        /**
1322         * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
1323         * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
1324         * from the output map. May return null (rather than return an empty Map).
1325         */
1326    
1327        private Map filter(Map input, String[] removeKeys)
1328        {
1329            if (input == null || input.isEmpty())
1330                return null;
1331    
1332            Map result = null;
1333    
1334            Iterator i = input.entrySet().iterator();
1335    
1336            nextkey: while (i.hasNext())
1337            {
1338                Map.Entry entry = (Map.Entry) i.next();
1339    
1340                String key = (String) entry.getKey();
1341    
1342                for (int j = 0; j < removeKeys.length; j++)
1343                {
1344                    if (key.equalsIgnoreCase(removeKeys[j]))
1345                        continue nextkey;
1346                }
1347    
1348                if (result == null)
1349                    result = new HashMap(input.size());
1350    
1351                result.put(key, entry.getValue());
1352            }
1353    
1354            return result;
1355        }
1356    
1357        /**
1358         * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
1359         * and values. Returns the value for the first key found that matches (caselessly) the input
1360         * key. Returns null if no value found.
1361         */
1362    
1363        protected String findValueCaselessly(String key, Map map)
1364        {
1365            String result = (String) map.get(key);
1366    
1367            if (result != null)
1368                return result;
1369    
1370            Iterator i = map.entrySet().iterator();
1371            while (i.hasNext())
1372            {
1373                Map.Entry entry = (Map.Entry) i.next();
1374    
1375                String entryKey = (String) entry.getKey();
1376    
1377                if (entryKey.equalsIgnoreCase(key))
1378                    return (String) entry.getValue();
1379            }
1380    
1381            return null;
1382        }
1383    
1384        /**
1385         * Provided a raw input string that has been recognized to be an expression, this removes excess
1386         * white space and converts &amp;amp;;, &amp;quot;; &amp;lt;; and &amp;gt;; to their normal
1387         * character values (otherwise its impossible to specify those values in expressions in the
1388         * template).
1389         */
1390    
1391        private String convertEntitiesToPlain(String input)
1392        {
1393            int inputLength = input.length();
1394    
1395            StringBuffer buffer = new StringBuffer(inputLength);
1396    
1397            int cursor = 0;
1398    
1399            outer: while (cursor < inputLength)
1400            {
1401                for (int i = 0; i < CONVERSIONS.length; i += 2)
1402                {
1403                    String entity = CONVERSIONS[i];
1404                    int entityLength = entity.length();
1405                    String value = CONVERSIONS[i + 1];
1406    
1407                    if (cursor + entityLength > inputLength)
1408                        continue;
1409    
1410                    if (input.substring(cursor, cursor + entityLength).equals(entity))
1411                    {
1412                        buffer.append(value);
1413                        cursor += entityLength;
1414                        continue outer;
1415                    }
1416                }
1417    
1418                buffer.append(input.charAt(cursor));
1419                cursor++;
1420            }
1421    
1422            return buffer.toString().trim();
1423        }
1424    
1425        /**
1426         * Returns true if the map contains the given key (caseless search) and the value is "true"
1427         * (caseless comparison).
1428         */
1429    
1430        private boolean checkBoolean(String key, Map map)
1431        {
1432            String value = findValueCaselessly(key, map);
1433    
1434            if (value == null)
1435                return false;
1436    
1437            return value.equalsIgnoreCase("true");
1438        }
1439    
1440        /**
1441         * Gets the current location within the file. This allows the location to be created only as
1442         * needed, and multiple objects on the same line can share the same Location instance.
1443         * 
1444         * @since 3.0
1445         */
1446    
1447        protected Location getCurrentLocation()
1448        {
1449            if (_currentLocation == null)
1450                _currentLocation = new LocationImpl(_resourceLocation, _line);
1451    
1452            return _currentLocation;
1453        }
1454    
1455        public void setFactory(TemplateTokenFactory factory)
1456        {
1457            _factory = factory;
1458        }
1459    
1460    }