001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.parse;
016
017 import java.util.ArrayList;
018 import java.util.Collections;
019 import java.util.HashMap;
020 import java.util.Iterator;
021 import java.util.List;
022 import java.util.Map;
023
024 import org.apache.hivemind.ApplicationRuntimeException;
025 import org.apache.hivemind.Location;
026 import org.apache.hivemind.Resource;
027 import org.apache.hivemind.impl.LocationImpl;
028 import org.apache.oro.text.regex.MalformedPatternException;
029 import org.apache.oro.text.regex.MatchResult;
030 import org.apache.oro.text.regex.Pattern;
031 import org.apache.oro.text.regex.PatternMatcher;
032 import org.apache.oro.text.regex.Perl5Compiler;
033 import org.apache.oro.text.regex.Perl5Matcher;
034 import org.apache.tapestry.util.IdAllocator;
035
036 /**
037 * Parses Tapestry templates, breaking them into a series of
038 * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
039 * template", there is no real requirement that the template be HTML. This parser can handle any
040 * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
041 * HTML reasonably.
042 * <p>
043 * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
044 * <p>
045 * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
046 * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
047 * or end the tag with "<code>/></code>".
048 * <p>
049 * Generally, the id specified in the template is matched against an component defined in the
050 * specification. However, implicit components are also possible. The jwcid attribute uses the
051 * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
052 * prefix. Such a component is anonymous (but is given a unique id).
053 * <p>
054 * (The unique ids assigned start with a dollar sign, which is normally no allowed for
055 * component ids ... this helps to make them stand out and assures that they do not conflict
056 * with user-defined component ids. These ids tend to propagate into URLs and become HTML
057 * element names and even JavaScript variable names ... the dollar sign is acceptible in these
058 * contexts as well).
059 * <p>
060 * Implicit component may also be given a name using the syntax "
061 * <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
062 * specification, but may still be accessed via
063 * {@link org.apache.tapestry.IComponent#getComponent(String)}.
064 * <p>
065 * Both defined and implicit components may have additional attributes defined, simply by
066 * including them in the template. They set formal or informal parameters of the component to
067 * static strings.
068 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
069 * false, will cause such attributes to be simply ignored. For defined components, conflicting
070 * values defined in the template are ignored.
071 * <p>
072 * Attributes in component tags will become formal and informal parameters of the
073 * corresponding component. Most attributes will be
074 * <p>
075 * The parser removes the body of some tags (when the corresponding component doesn't
076 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
077 * allows portions of the template to be completely removed.
078 * <p>
079 * The parser does a pretty thorough lexical analysis of the template, and reports a great
080 * number of errors, including improper nesting of tags.
081 * <p>
082 * The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
083 * form: <code><span key="<i>value</i>"> ... </span></code> and converts them
084 * into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code>
085 * attribute ... if the value is <code>true</code>, then the localized value is sent to the
086 * client without filtering, which is appropriate if the value has any markup that should not
087 * be escaped.
088 * @author Howard Lewis Ship, Geoff Longman
089 */
090
091 public class TemplateParser implements ITemplateParser
092 {
093 /**
094 * The attribute, checked for in <span> tags, that signfies that the span is being used as
095 * an invisible localization.
096 *
097 * @since 2.0.4
098 */
099
100 public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
101
102 /**
103 * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be
104 * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
105 * equal "true" (caselessly).
106 *
107 * @since 2.3
108 */
109
110 public static final String RAW_ATTRIBUTE_NAME = "raw";
111
112 public static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
113
114 /**
115 * Pattern used to recognize ordinary components (defined in the specification).
116 *
117 * @since 3.0
118 */
119
120 public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
121
122 /**
123 * Pattern used to recognize implicit components (whose type is defined in the template).
124 * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
125 * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
126 * type, which may (as of 4.0) have slashes to delinate folders containing the component.
127 *
128 * @since 3.0
129 */
130
131 public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
132 + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*"
133 + PROPERTY_NAME_PATTERN + "))$";
134
135 /**
136 * A "magic" component id that causes the tag with the id and its entire body to be ignored
137 * during parsing.
138 */
139
140 private static final String REMOVE_ID = "$remove$";
141
142 /**
143 * A "magic" component id that causes the tag to represent the true content of the template. Any
144 * content prior to the tag is discarded, and any content after the tag is ignored. The tag
145 * itself is not included.
146 */
147
148 private static final String CONTENT_ID = "$content$";
149
150 private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
151
152 private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
153
154 private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
155
156 private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
157
158 private static final char[] COMMENT_START = new char[]
159 { '<', '!', '-', '-' };
160
161 private static final char[] COMMENT_END = new char[]
162 { '-', '-', '>' };
163
164 private static final char[] CLOSE_TAG = new char[]
165 { '<', '/' };
166
167 private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
168
169 private static final int COLLECT_ATTRIBUTE_NAME = 1;
170
171 private static final int ADVANCE_PAST_EQUALS = 2;
172
173 private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
174
175 private static final int COLLECT_QUOTED_VALUE = 4;
176
177 private static final int COLLECT_UNQUOTED_VALUE = 5;
178
179 /**
180 * Conversions needed by {@link #convertEntitiesToPlain(String)}.
181 */
182
183 private static final String[] CONVERSIONS =
184 { "<", "<", ">", ">", """, "\"", "&", "&" };
185
186 /**
187 * Attribute name used to identify components.
188 *
189 * @since 4.0
190 */
191
192 private String _componentAttributeName;
193
194 private Pattern _simpleIdPattern;
195
196 private Pattern _implicitIdPattern;
197
198 private PatternMatcher _patternMatcher;
199
200 private IdAllocator _idAllocator = new IdAllocator();
201
202 private ITemplateParserDelegate _delegate;
203
204 /**
205 * Identifies the template being parsed; used with error messages.
206 */
207
208 private Resource _resourceLocation;
209
210 /**
211 * Shared instance of {@link Location} used by all {@link TextToken} instances in the template.
212 */
213
214 private Location _templateLocation;
215
216 /**
217 * Location with in the resource for the current line.
218 */
219
220 private Location _currentLocation;
221
222 /**
223 * Local reference to the template data that is to be parsed.
224 */
225
226 private char[] _templateData;
227
228 /**
229 * List of Tag.
230 */
231
232 private List _stack = new ArrayList();
233
234 /**
235 *
236 * @author hls
237 */
238 private static class Tag
239 {
240 // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
241 String _tagName;
242
243 // If true, the tag is a placeholder for a dynamic element
244 boolean _component;
245
246 // If true, the body of the tag is being ignored, and the
247 // ignore flag is cleared when the close tag is reached
248 boolean _ignoringBody;
249
250 // If true, then the entire tag (and its body) is being ignored
251 boolean _removeTag;
252
253 // If true, then the tag must have a balanced closing tag.
254 // This is always true for components.
255 boolean _mustBalance;
256
257 // The line on which the start tag exists
258 int _line;
259
260 // If true, then the parse ends when the closing tag is found.
261 boolean _content;
262
263 Tag(String tagName, int line)
264 {
265 _tagName = tagName;
266 _line = line;
267 }
268
269 boolean match(String matchTagName)
270 {
271 return _tagName.equalsIgnoreCase(matchTagName);
272 }
273 }
274
275 /**
276 * List of {@link TemplateToken}, this forms the ultimate response.
277 */
278
279 private List _tokens = new ArrayList();
280
281 /**
282 * The location of the 'cursor' within the template data. The advance() method moves this
283 * forward.
284 */
285
286 private int _cursor;
287
288 /**
289 * The start of the current block of static text, or -1 if no block is active.
290 */
291
292 private int _blockStart;
293
294 /**
295 * The current line number; tracked by advance(). Starts at 1.
296 */
297
298 private int _line;
299
300 /**
301 * Set to true when the body of a tag is being ignored. This is typically used to skip over the
302 * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
303 * of $remove$ is used.
304 */
305
306 private boolean _ignoring;
307
308 /**
309 * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
310 */
311
312 private Map _attributes = new HashMap();
313
314 /**
315 * A factory used to create template tokens.
316 */
317
318 private TemplateTokenFactory _factory;
319
320 public TemplateParser()
321 {
322 Perl5Compiler compiler = new Perl5Compiler();
323
324 try
325 {
326 _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
327 _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
328 }
329 catch (MalformedPatternException ex)
330 {
331 throw new ApplicationRuntimeException(ex);
332 }
333
334 _patternMatcher = new Perl5Matcher();
335 }
336
337 /**
338 * Parses the template data into an array of {@link TemplateToken}s.
339 * <p>
340 * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
341 * thread accesses it.
342 *
343 * @param templateData
344 * the HTML template to parse. Some tokens will hold a reference to this array.
345 * @param delegate
346 * object that "knows" about defined components
347 * @param resourceLocation
348 * a description of where the template originated from, used with error messages.
349 */
350
351 public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate,
352 Resource resourceLocation) throws TemplateParseException
353 {
354 try
355 {
356 beforeParse(templateData, delegate, resourceLocation);
357
358 parse();
359
360 return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
361 }
362 finally
363 {
364 afterParse();
365 }
366 }
367
368 /**
369 * perform default initialization of the parser.
370 */
371
372 protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate, Resource resourceLocation)
373 {
374 _templateData = templateData;
375 _resourceLocation = resourceLocation;
376 _templateLocation = new LocationImpl(resourceLocation);
377 _delegate = delegate;
378 _ignoring = false;
379 _line = 1;
380 _componentAttributeName = delegate.getComponentAttributeName();
381 }
382
383 /**
384 * Perform default cleanup after parsing completes.
385 */
386
387 protected void afterParse()
388 {
389 _delegate = null;
390 _templateData = null;
391 _resourceLocation = null;
392 _templateLocation = null;
393 _currentLocation = null;
394 _stack.clear();
395 _tokens.clear();
396 _attributes.clear();
397 _idAllocator.clear();
398 }
399
400 /**
401 * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
402 * is reported.
403 * <p>
404 * The default implementation simply throws an exception that contains the message and location
405 * parameters.
406 * <p>
407 * Subclasses may override but <b>must </b> ensure they throw the required exception.
408 *
409 * @param message
410 * @param location
411 * @param line
412 * ignored by the default impl
413 * @param cursor
414 * ignored by the default impl
415 * @throws TemplateParseException
416 * always thrown in order to terminate the parse.
417 */
418
419 protected void templateParseProblem(String message, Location location, int line, int cursor)
420 throws TemplateParseException
421 {
422 throw new TemplateParseException(message, location);
423 }
424
425 /**
426 * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
427 * </b> stop when a problem is reported.
428 * <p>
429 * The default implementation simply rethrows the exception.
430 * <p>
431 * Subclasses may override but <b>must </b> ensure they rethrow the exception.
432 *
433 * @param exception
434 * @param line
435 * ignored by the default impl
436 * @param cursor
437 * ignored by the default impl
438 * @throws ApplicationRuntimeException
439 * always rethrown in order to terminate the parse.
440 */
441
442 protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
443 {
444 throw exception;
445 }
446
447 /**
448 * Give subclasses access to the parse results.
449 */
450 protected List getTokens()
451 {
452 if (_tokens == null)
453 return Collections.EMPTY_LIST;
454
455 return _tokens;
456 }
457
458 /**
459 * Checks to see if the next few characters match a given pattern.
460 */
461
462 private boolean lookahead(char[] match)
463 {
464 try
465 {
466 for (int i = 0; i < match.length; i++)
467 {
468 if (_templateData[_cursor + i] != match[i])
469 return false;
470 }
471
472 // Every character matched.
473
474 return true;
475 }
476 catch (IndexOutOfBoundsException ex)
477 {
478 return false;
479 }
480 }
481
482 protected void parse() throws TemplateParseException
483 {
484 _cursor = 0;
485 _blockStart = -1;
486 int length = _templateData.length;
487
488 while (_cursor < length)
489 {
490 if (_templateData[_cursor] != '<')
491 {
492 if (_blockStart < 0 && !_ignoring)
493 _blockStart = _cursor;
494
495 advance();
496 continue;
497 }
498
499 // OK, start of something.
500
501 if (lookahead(CLOSE_TAG))
502 {
503 closeTag();
504 continue;
505 }
506
507 if (lookahead(COMMENT_START))
508 {
509 skipComment();
510 continue;
511 }
512
513 // The start of some tag.
514
515 startTag();
516 }
517
518 // Usually there's some text at the end of the template (after the last closing tag) that
519 // should
520 // be added. Often the last few tags are static tags so we definately
521 // need to end the text block.
522
523 addTextToken(_templateData.length - 1);
524 }
525
526 /**
527 * Advance forward in the document until the end of the comment is reached. In addition, skip
528 * any whitespace following the comment.
529 */
530
531 private void skipComment() throws TemplateParseException
532 {
533 int length = _templateData.length;
534 int startLine = _line;
535
536 if (_blockStart < 0 && !_ignoring)
537 _blockStart = _cursor;
538
539 while (true)
540 {
541 if (_cursor >= length)
542 templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
543 _resourceLocation, startLine), startLine, _cursor);
544
545 if (lookahead(COMMENT_END))
546 break;
547
548 // Not the end of the comment, advance over it.
549
550 advance();
551 }
552
553 _cursor += COMMENT_END.length;
554 advanceOverWhitespace();
555 }
556
557 private void addTextToken(int end)
558 {
559 // No active block to add to.
560
561 if (_blockStart < 0)
562 return;
563
564 if (_blockStart <= end)
565 {
566 // This seems odd, shouldn't the location be the current location? I guess
567 // no errors are ever reported for a text token.
568
569 TemplateToken token = _factory.createTextToken(
570 _templateData,
571 _blockStart,
572 end,
573 _templateLocation);
574
575 _tokens.add(token);
576 }
577
578 _blockStart = -1;
579 }
580
581 private void startTag() throws TemplateParseException
582 {
583 int cursorStart = _cursor;
584 int length = _templateData.length;
585 String tagName = null;
586 boolean endOfTag = false;
587 boolean emptyTag = false;
588 int startLine = _line;
589 Location startLocation = new LocationImpl(_resourceLocation, startLine);
590
591 tagBeginEvent(startLine, _cursor);
592
593 advance();
594
595 // Collect the element type
596
597 while (_cursor < length)
598 {
599 char ch = _templateData[_cursor];
600
601 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
602 {
603 tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
604
605 break;
606 }
607
608 advance();
609 }
610
611 String attributeName = null;
612 int attributeNameStart = -1;
613 int attributeValueStart = -1;
614 int state = WAIT_FOR_ATTRIBUTE_NAME;
615 char quoteChar = 0;
616
617 _attributes.clear();
618
619 // Collect each attribute
620
621 while (!endOfTag)
622 {
623 if (_cursor >= length)
624 {
625 String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
626 : ParseMessages.unclosedTag(tagName, startLine);
627
628 templateParseProblem(message, startLocation, startLine, cursorStart);
629 }
630
631 char ch = _templateData[_cursor];
632
633 switch (state)
634 {
635 case WAIT_FOR_ATTRIBUTE_NAME:
636
637 // Ignore whitespace before the next attribute name, while
638 // looking for the end of the current tag.
639
640 if (ch == '/')
641 {
642 emptyTag = true;
643 advance();
644 break;
645 }
646
647 if (ch == '>')
648 {
649 endOfTag = true;
650 break;
651 }
652
653 if (Character.isWhitespace(ch))
654 {
655 advance();
656 break;
657 }
658
659 // Found non-whitespace, assume its the attribute name.
660 // Note: could use a check here for non-alpha.
661
662 attributeNameStart = _cursor;
663 state = COLLECT_ATTRIBUTE_NAME;
664 advance();
665 break;
666
667 case COLLECT_ATTRIBUTE_NAME:
668
669 // Looking for end of attribute name.
670
671 if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
672 {
673 attributeName = new String(_templateData, attributeNameStart, _cursor
674 - attributeNameStart);
675
676 state = ADVANCE_PAST_EQUALS;
677 break;
678 }
679
680 // Part of the attribute name
681
682 advance();
683 break;
684
685 case ADVANCE_PAST_EQUALS:
686
687 // Looking for the '=' sign. May hit the end of the tag, or (for bare
688 // attributes),
689 // the next attribute name.
690
691 if (ch == '/' || ch == '>')
692 {
693 // A bare attribute, which is not interesting to
694 // us.
695
696 state = WAIT_FOR_ATTRIBUTE_NAME;
697 break;
698 }
699
700 if (Character.isWhitespace(ch))
701 {
702 advance();
703 break;
704 }
705
706 if (ch == '=')
707 {
708 state = WAIT_FOR_ATTRIBUTE_VALUE;
709 quoteChar = 0;
710 attributeValueStart = -1;
711 advance();
712 break;
713 }
714
715 // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
716 // We aren't interested in those (we're just looking for the id or jwcid
717 // attribute).
718
719 state = WAIT_FOR_ATTRIBUTE_NAME;
720 break;
721
722 case WAIT_FOR_ATTRIBUTE_VALUE:
723
724 if (ch == '/' || ch == '>')
725 templateParseProblem(ParseMessages.missingAttributeValue(
726 tagName,
727 _line,
728 attributeName), getCurrentLocation(), _line, _cursor);
729
730 // Ignore whitespace between '=' and the attribute value. Also, look
731 // for initial quote.
732
733 if (Character.isWhitespace(ch))
734 {
735 advance();
736 break;
737 }
738
739 if (ch == '\'' || ch == '"')
740 {
741 quoteChar = ch;
742
743 state = COLLECT_QUOTED_VALUE;
744 advance();
745 attributeValueStart = _cursor;
746 attributeBeginEvent(attributeName, _line, attributeValueStart);
747 break;
748 }
749
750 // Not whitespace or quote, must be start of unquoted attribute.
751
752 state = COLLECT_UNQUOTED_VALUE;
753 attributeValueStart = _cursor;
754 attributeBeginEvent(attributeName, _line, attributeValueStart);
755 break;
756
757 case COLLECT_QUOTED_VALUE:
758
759 // Start collecting the quoted attribute value. Stop at the matching quote
760 // character,
761 // unless bare, in which case, stop at the next whitespace.
762
763 if (ch == quoteChar)
764 {
765 String attributeValue = new String(_templateData, attributeValueStart,
766 _cursor - attributeValueStart);
767
768 attributeEndEvent(_cursor);
769
770 addAttributeIfUnique(tagName, attributeName, attributeValue);
771
772 // Advance over the quote.
773 advance();
774 state = WAIT_FOR_ATTRIBUTE_NAME;
775 break;
776 }
777
778 advance();
779 break;
780
781 case COLLECT_UNQUOTED_VALUE:
782
783 // An unquoted attribute value ends with whitespace
784 // or the end of the enclosing tag.
785
786 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
787 {
788 String attributeValue = new String(_templateData, attributeValueStart,
789 _cursor - attributeValueStart);
790
791 attributeEndEvent(_cursor);
792 addAttributeIfUnique(tagName, attributeName, attributeValue);
793
794 state = WAIT_FOR_ATTRIBUTE_NAME;
795 break;
796 }
797
798 advance();
799 break;
800 }
801 }
802
803 tagEndEvent(_cursor);
804
805 // Check for invisible localizations
806
807 String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
808 String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
809
810 if (localizationKey != null && jwcId == null)
811 {
812 if (_ignoring)
813 templateParseProblem(
814 ParseMessages.componentMayNotBeIgnored(tagName, startLine),
815 startLocation,
816 startLine,
817 cursorStart);
818
819 // If the tag isn't empty, then create a Tag instance to ignore the
820 // body of the tag.
821
822 if (!emptyTag)
823 {
824 Tag tag = new Tag(tagName, startLine);
825
826 tag._component = false;
827 tag._removeTag = false;
828 tag._ignoringBody = true;
829 tag._mustBalance = true;
830
831 _stack.add(tag);
832
833 // Start ignoring content until the close tag.
834
835 _ignoring = true;
836 }
837 else
838 {
839 // Cursor is at the closing carat, advance over it.
840 advance();
841 // TAPESTRY-359: *don't* skip whitespace advanceOverWhitespace()
842 }
843
844 // End any open block.
845
846 addTextToken(cursorStart - 1);
847
848 boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
849
850 Map attributes = filter(_attributes, new String[] { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
851
852 TemplateToken token = _factory.createLocalizationToken(
853 tagName,
854 localizationKey,
855 raw,
856 attributes,
857 startLocation);
858
859 _tokens.add(token);
860
861 return;
862 }
863
864 if (jwcId != null)
865 {
866 processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
867 return;
868 }
869
870 // A static tag (not a tag without a jwcid attribute).
871 // We need to record this so that we can match close tags later.
872
873 if (!emptyTag)
874 {
875 Tag tag = new Tag(tagName, startLine);
876 _stack.add(tag);
877 }
878
879 // If there wasn't an active block, then start one.
880
881 if (_blockStart < 0 && !_ignoring)
882 _blockStart = cursorStart;
883
884 advance();
885 }
886
887 /**
888 * @throws TemplateParseException
889 * @since 4.0
890 */
891
892 private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
893 throws TemplateParseException
894 {
895
896 if (_attributes.containsKey(attributeName))
897 templateParseProblem(
898 ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
899 getCurrentLocation(),
900 _line,
901 _cursor);
902
903 _attributes.put(attributeName, attributeValue);
904 }
905
906 /**
907 * Processes a tag that is the open tag for a component (but also handles the $remove$ and
908 * $content$ tags).
909 */
910
911 /**
912 * Notify that the beginning of a tag has been detected.
913 * <p>
914 * Default implementation does nothing.
915 */
916 protected void tagBeginEvent(int startLine, int cursorPosition)
917 {
918 }
919
920 /**
921 * Notify that the end of the current tag has been detected.
922 * <p>
923 * Default implementation does nothing.
924 */
925 protected void tagEndEvent(int cursorPosition)
926 {
927 }
928
929 /**
930 * Notify that the beginning of an attribute value has been detected.
931 * <p>
932 * Default implementation does nothing.
933 */
934 protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
935 {
936 }
937
938 /**
939 * Notify that the end of the current attribute value has been detected.
940 * <p>
941 * Default implementation does nothing.
942 */
943 protected void attributeEndEvent(int cursorPosition)
944 {
945 }
946
947 private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
948 int startLine, int cursorStart, Location startLocation) throws TemplateParseException
949 {
950 String componentId = jwcId;
951 if (componentId.equalsIgnoreCase(CONTENT_ID))
952 {
953 processContentTag(tagName, startLine, cursorStart, emptyTag);
954
955 return;
956 }
957
958 boolean isRemoveId = componentId.equalsIgnoreCase(REMOVE_ID);
959
960 if (_ignoring && !isRemoveId)
961 templateParseProblem(
962 ParseMessages.componentMayNotBeIgnored(tagName, startLine),
963 startLocation,
964 startLine,
965 cursorStart);
966
967 String type = null;
968 boolean allowBody = false;
969
970 if (_patternMatcher.matches(componentId, _implicitIdPattern))
971 {
972 MatchResult match = _patternMatcher.getMatch();
973
974 componentId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
975 type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
976
977 String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
978 String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
979
980 // If (and this is typical) no actual component id was specified,
981 // then generate one on the fly.
982 // The allocated id for anonymous components is
983 // based on the simple (unprefixed) type, but starts
984 // with a leading dollar sign to ensure no conflicts
985 // with user defined component ids (which don't allow dollar signs
986 // in the id).
987 // New for 4.0: the component type may included slashes ('/'), but these
988 // are not valid identifiers, so we convert them to '$'.
989
990 if (componentId == null)
991 componentId = _idAllocator.allocateId("$" + simpleType.replace('/', '$'));
992
993 try
994 {
995 allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
996 }
997 catch (ApplicationRuntimeException e)
998 {
999 // give subclasses a chance to handle and rethrow
1000 templateParseProblem(e, startLine, cursorStart);
1001 }
1002
1003 }
1004 else
1005 {
1006 if (!isRemoveId)
1007 {
1008 if (!_patternMatcher.matches(componentId, _simpleIdPattern))
1009 templateParseProblem(
1010 ParseMessages.componentIdInvalid(tagName, startLine, componentId),
1011 startLocation,
1012 startLine,
1013 cursorStart);
1014
1015 if (!_delegate.getKnownComponent(componentId))
1016 templateParseProblem(
1017 ParseMessages.unknownComponentId(tagName, startLine, componentId),
1018 startLocation,
1019 startLine,
1020 cursorStart);
1021
1022 try
1023 {
1024 allowBody = _delegate.getAllowBody(componentId, startLocation);
1025 }
1026 catch (ApplicationRuntimeException e)
1027 {
1028 // give subclasses a chance to handle and rethrow
1029 templateParseProblem(e, startLine, cursorStart);
1030 }
1031 }
1032 }
1033
1034 // Ignore the body if we're removing the entire tag,
1035 // of if the corresponding component doesn't allow
1036 // a body.
1037
1038 boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
1039
1040 if (_ignoring && ignoreBody)
1041 templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
1042 _resourceLocation, startLine), startLine, cursorStart);
1043
1044 if (!emptyTag)
1045 pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
1046
1047 // End any open block.
1048
1049 addTextToken(cursorStart - 1);
1050
1051 if (!isRemoveId)
1052 {
1053 addOpenToken(tagName, componentId, type, startLocation);
1054
1055 if (emptyTag)
1056 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1057 }
1058
1059 advance();
1060 }
1061
1062 private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
1063 {
1064 Tag tag = new Tag(tagName, startLine);
1065
1066 tag._component = !isRemoveId;
1067 tag._removeTag = isRemoveId;
1068
1069 tag._ignoringBody = ignoreBody;
1070
1071 _ignoring = tag._ignoringBody;
1072
1073 tag._mustBalance = true;
1074
1075 _stack.add(tag);
1076 }
1077
1078 private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
1079 throws TemplateParseException
1080 {
1081 if (_ignoring)
1082 templateParseProblem(
1083 ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
1084 new LocationImpl(_resourceLocation, startLine),
1085 startLine,
1086 cursorStart);
1087
1088 if (emptyTag)
1089 templateParseProblem(
1090 ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
1091 new LocationImpl(_resourceLocation, startLine),
1092 startLine,
1093 cursorStart);
1094
1095 _tokens.clear();
1096 _blockStart = -1;
1097
1098 Tag tag = new Tag(tagName, startLine);
1099
1100 tag._mustBalance = true;
1101 tag._content = true;
1102
1103 _stack.clear();
1104 _stack.add(tag);
1105
1106 advance();
1107 }
1108
1109 private void addOpenToken(String tagName, String jwcId, String type, Location location)
1110 {
1111 OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
1112 _tokens.add(token);
1113
1114 if (_attributes.isEmpty())
1115 return;
1116
1117 Iterator i = _attributes.entrySet().iterator();
1118 while (i.hasNext())
1119 {
1120 Map.Entry entry = (Map.Entry) i.next();
1121
1122 String key = (String) entry.getKey();
1123
1124 if (key.equalsIgnoreCase(_componentAttributeName))
1125 continue;
1126
1127 String value = (String) entry.getValue();
1128
1129 addAttributeToToken(token, key, value);
1130 }
1131 }
1132
1133 /**
1134 * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
1135 *
1136 * @since 3.0
1137 */
1138
1139 private void addAttributeToToken(OpenToken token, String name, String attributeValue)
1140 {
1141 token.addAttribute(name, convertEntitiesToPlain(attributeValue));
1142 }
1143
1144 /**
1145 * Invoked to handle a closing tag, i.e., </foo>. When a tag closes, it will match against
1146 * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
1147 * balanced), but this is HTML, not XML, so many tags won't balance.
1148 * <p>
1149 * Once the matching tag is located, the question is ... is the tag dynamic or static? If
1150 * static, then the current text block is extended to include this close tag. If dynamic, then
1151 * the current text block is ended (before the '<' that starts the tag) and a close token is
1152 * added.
1153 * <p>
1154 * In either case, the matching static element and anything above it is removed, and the cursor
1155 * is left on the character following the '>'.
1156 */
1157
1158 private void closeTag() throws TemplateParseException
1159 {
1160 int cursorStart = _cursor;
1161 int length = _templateData.length;
1162 int startLine = _line;
1163
1164 Location startLocation = getCurrentLocation();
1165
1166 _cursor += CLOSE_TAG.length;
1167
1168 int tagStart = _cursor;
1169
1170 while (true)
1171 {
1172 if (_cursor >= length)
1173 templateParseProblem(
1174 ParseMessages.incompleteCloseTag(startLine),
1175 startLocation,
1176 startLine,
1177 cursorStart);
1178
1179 char ch = _templateData[_cursor];
1180
1181 if (ch == '>')
1182 break;
1183
1184 advance();
1185 }
1186
1187 String tagName = new String(_templateData, tagStart, _cursor - tagStart);
1188
1189 int stackPos = _stack.size() - 1;
1190 Tag tag = null;
1191
1192 while (stackPos >= 0)
1193 {
1194 tag = (Tag) _stack.get(stackPos);
1195
1196 if (tag.match(tagName))
1197 break;
1198
1199 if (tag._mustBalance)
1200 templateParseProblem(ParseMessages.improperlyNestedCloseTag(
1201 tagName,
1202 startLine,
1203 tag._tagName,
1204 tag._line), startLocation, startLine, cursorStart);
1205
1206 stackPos--;
1207 }
1208
1209 if (stackPos < 0)
1210 templateParseProblem(
1211 ParseMessages.unmatchedCloseTag(tagName, startLine),
1212 startLocation,
1213 startLine,
1214 cursorStart);
1215
1216 // Special case for the content tag
1217
1218 if (tag._content)
1219 {
1220 addTextToken(cursorStart - 1);
1221
1222 // Advance the cursor right to the end.
1223
1224 _cursor = length;
1225 _stack.clear();
1226 return;
1227 }
1228
1229 // When a component closes, add a CLOSE tag.
1230 if (tag._component)
1231 {
1232 addTextToken(cursorStart - 1);
1233
1234 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1235 }
1236 else
1237 {
1238 // The close of a static tag. Unless removing the tag
1239 // entirely, make sure the block tag is part of a text block.
1240
1241 if (_blockStart < 0 && !tag._removeTag && !_ignoring)
1242 _blockStart = cursorStart;
1243 }
1244
1245 // Remove all elements at stackPos or above.
1246
1247 for (int i = _stack.size() - 1; i >= stackPos; i--)
1248 _stack.remove(i);
1249
1250 // Advance cursor past '>'
1251
1252 advance();
1253
1254 // If editting out the tag (i.e., $remove$) then kill any whitespace.
1255 // For components that simply don't contain a body, removeTag will
1256 // be false.
1257
1258 if (tag._removeTag)
1259 advanceOverWhitespace();
1260
1261 // If we were ignoring the body of the tag, then clear the ignoring
1262 // flag, since we're out of the body.
1263
1264 if (tag._ignoringBody)
1265 _ignoring = false;
1266 }
1267
1268 /**
1269 * Advances the cursor to the next character. If the end-of-line is reached, then increments the
1270 * line counter.
1271 */
1272
1273 private void advance()
1274 {
1275 int length = _templateData.length;
1276
1277 if (_cursor >= length)
1278 return;
1279
1280 char ch = _templateData[_cursor];
1281
1282 _cursor++;
1283
1284 if (ch == '\n')
1285 {
1286 _line++;
1287 _currentLocation = null;
1288 return;
1289 }
1290
1291 // A \r, or a \r\n also counts as a new line.
1292
1293 if (ch == '\r')
1294 {
1295 _line++;
1296 _currentLocation = null;
1297
1298 if (_cursor < length && _templateData[_cursor] == '\n')
1299 _cursor++;
1300
1301 return;
1302 }
1303
1304 // Not an end-of-line character.
1305 }
1306
1307 private void advanceOverWhitespace()
1308 {
1309 int length = _templateData.length;
1310
1311 while (_cursor < length)
1312 {
1313 char ch = _templateData[_cursor];
1314 if (!Character.isWhitespace(ch))
1315 return;
1316
1317 advance();
1318 }
1319 }
1320
1321 /**
1322 * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
1323 * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
1324 * from the output map. May return null (rather than return an empty Map).
1325 */
1326
1327 private Map filter(Map input, String[] removeKeys)
1328 {
1329 if (input == null || input.isEmpty())
1330 return null;
1331
1332 Map result = null;
1333
1334 Iterator i = input.entrySet().iterator();
1335
1336 nextkey: while (i.hasNext())
1337 {
1338 Map.Entry entry = (Map.Entry) i.next();
1339
1340 String key = (String) entry.getKey();
1341
1342 for (int j = 0; j < removeKeys.length; j++)
1343 {
1344 if (key.equalsIgnoreCase(removeKeys[j]))
1345 continue nextkey;
1346 }
1347
1348 if (result == null)
1349 result = new HashMap(input.size());
1350
1351 result.put(key, entry.getValue());
1352 }
1353
1354 return result;
1355 }
1356
1357 /**
1358 * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
1359 * and values. Returns the value for the first key found that matches (caselessly) the input
1360 * key. Returns null if no value found.
1361 */
1362
1363 protected String findValueCaselessly(String key, Map map)
1364 {
1365 String result = (String) map.get(key);
1366
1367 if (result != null)
1368 return result;
1369
1370 Iterator i = map.entrySet().iterator();
1371 while (i.hasNext())
1372 {
1373 Map.Entry entry = (Map.Entry) i.next();
1374
1375 String entryKey = (String) entry.getKey();
1376
1377 if (entryKey.equalsIgnoreCase(key))
1378 return (String) entry.getValue();
1379 }
1380
1381 return null;
1382 }
1383
1384 /**
1385 * Provided a raw input string that has been recognized to be an expression, this removes excess
1386 * white space and converts &amp;;, &quot;; &lt;; and &gt;; to their normal
1387 * character values (otherwise its impossible to specify those values in expressions in the
1388 * template).
1389 */
1390
1391 private String convertEntitiesToPlain(String input)
1392 {
1393 int inputLength = input.length();
1394
1395 StringBuffer buffer = new StringBuffer(inputLength);
1396
1397 int cursor = 0;
1398
1399 outer: while (cursor < inputLength)
1400 {
1401 for (int i = 0; i < CONVERSIONS.length; i += 2)
1402 {
1403 String entity = CONVERSIONS[i];
1404 int entityLength = entity.length();
1405 String value = CONVERSIONS[i + 1];
1406
1407 if (cursor + entityLength > inputLength)
1408 continue;
1409
1410 if (input.substring(cursor, cursor + entityLength).equals(entity))
1411 {
1412 buffer.append(value);
1413 cursor += entityLength;
1414 continue outer;
1415 }
1416 }
1417
1418 buffer.append(input.charAt(cursor));
1419 cursor++;
1420 }
1421
1422 return buffer.toString().trim();
1423 }
1424
1425 /**
1426 * Returns true if the map contains the given key (caseless search) and the value is "true"
1427 * (caseless comparison).
1428 */
1429
1430 private boolean checkBoolean(String key, Map map)
1431 {
1432 String value = findValueCaselessly(key, map);
1433
1434 if (value == null)
1435 return false;
1436
1437 return value.equalsIgnoreCase("true");
1438 }
1439
1440 /**
1441 * Gets the current location within the file. This allows the location to be created only as
1442 * needed, and multiple objects on the same line can share the same Location instance.
1443 *
1444 * @since 3.0
1445 */
1446
1447 protected Location getCurrentLocation()
1448 {
1449 if (_currentLocation == null)
1450 _currentLocation = new LocationImpl(_resourceLocation, _line);
1451
1452 return _currentLocation;
1453 }
1454
1455 public void setFactory(TemplateTokenFactory factory)
1456 {
1457 _factory = factory;
1458 }
1459
1460 }