001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.parse; 016 017 import java.util.ArrayList; 018 import java.util.Collections; 019 import java.util.HashMap; 020 import java.util.Iterator; 021 import java.util.List; 022 import java.util.Map; 023 024 import org.apache.hivemind.ApplicationRuntimeException; 025 import org.apache.hivemind.Location; 026 import org.apache.hivemind.Resource; 027 import org.apache.hivemind.impl.LocationImpl; 028 import org.apache.oro.text.regex.MalformedPatternException; 029 import org.apache.oro.text.regex.MatchResult; 030 import org.apache.oro.text.regex.Pattern; 031 import org.apache.oro.text.regex.PatternMatcher; 032 import org.apache.oro.text.regex.Perl5Compiler; 033 import org.apache.oro.text.regex.Perl5Matcher; 034 import org.apache.tapestry.util.IdAllocator; 035 036 /** 037 * Parses Tapestry templates, breaking them into a series of 038 * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML 039 * template", there is no real requirement that the template be HTML. This parser can handle any 040 * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of 041 * HTML reasonably. 042 * <p> 043 * Deployed as the tapestry.parse.TemplateParser service, using the threaded model. 044 * <p> 045 * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a 046 * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag, 047 * or end the tag with "<code>/></code>". 048 * <p> 049 * Generally, the id specified in the template is matched against an component defined in the 050 * specification. However, implicit components are also possible. The jwcid attribute uses the 051 * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id 052 * prefix. Such a component is anonymous (but is given a unique id). 053 * <p> 054 * (The unique ids assigned start with a dollar sign, which is normally no allowed for 055 * component ids ... this helps to make them stand out and assures that they do not conflict 056 * with user-defined component ids. These ids tend to propagate into URLs and become HTML 057 * element names and even JavaScript variable names ... the dollar sign is acceptible in these 058 * contexts as well). 059 * <p> 060 * Implicit component may also be given a name using the syntax " 061 * <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the 062 * specification, but may still be accessed via 063 * {@link org.apache.tapestry.IComponent#getComponent(String)}. 064 * <p> 065 * Both defined and implicit components may have additional attributes defined, simply by 066 * including them in the template. They set formal or informal parameters of the component to 067 * static strings. 068 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if 069 * false, will cause such attributes to be simply ignored. For defined components, conflicting 070 * values defined in the template are ignored. 071 * <p> 072 * Attributes in component tags will become formal and informal parameters of the 073 * corresponding component. Most attributes will be 074 * <p> 075 * The parser removes the body of some tags (when the corresponding component doesn't 076 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and 077 * allows portions of the template to be completely removed. 078 * <p> 079 * The parser does a pretty thorough lexical analysis of the template, and reports a great 080 * number of errors, including improper nesting of tags. 081 * <p> 082 * The parser supports <em>invisible localization</em>: The parser recognizes HTML of the 083 * form: <code><span key="<i>value</i>"> ... </span></code> and converts them 084 * into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code> 085 * attribute ... if the value is <code>true</code>, then the localized value is sent to the 086 * client without filtering, which is appropriate if the value has any markup that should not 087 * be escaped. 088 * @author Howard Lewis Ship, Geoff Longman 089 */ 090 091 public class TemplateParser implements ITemplateParser 092 { 093 /** 094 * The attribute, checked for in <span> tags, that signfies that the span is being used as 095 * an invisible localization. 096 * 097 * @since 2.0.4 098 */ 099 100 public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key"; 101 102 /** 103 * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be 104 * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must 105 * equal "true" (caselessly). 106 * 107 * @since 2.3 108 */ 109 110 public static final String RAW_ATTRIBUTE_NAME = "raw"; 111 112 public static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*"; 113 114 /** 115 * Pattern used to recognize ordinary components (defined in the specification). 116 * 117 * @since 3.0 118 */ 119 120 public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$"; 121 122 /** 123 * Pattern used to recognize implicit components (whose type is defined in the template). 124 * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified 125 * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component 126 * type, which may (as of 4.0) have slashes to delinate folders containing the component. 127 * 128 * @since 3.0 129 */ 130 131 public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@(((" 132 + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*" 133 + PROPERTY_NAME_PATTERN + "))$"; 134 135 /** 136 * A "magic" component id that causes the tag with the id and its entire body to be ignored 137 * during parsing. 138 */ 139 140 private static final String REMOVE_ID = "$remove$"; 141 142 /** 143 * A "magic" component id that causes the tag to represent the true content of the template. Any 144 * content prior to the tag is discarded, and any content after the tag is ignored. The tag 145 * itself is not included. 146 */ 147 148 private static final String CONTENT_ID = "$content$"; 149 150 private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1; 151 152 private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2; 153 154 private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4; 155 156 private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5; 157 158 private static final char[] COMMENT_START = new char[] 159 { '<', '!', '-', '-' }; 160 161 private static final char[] COMMENT_END = new char[] 162 { '-', '-', '>' }; 163 164 private static final char[] CLOSE_TAG = new char[] 165 { '<', '/' }; 166 167 private static final int WAIT_FOR_ATTRIBUTE_NAME = 0; 168 169 private static final int COLLECT_ATTRIBUTE_NAME = 1; 170 171 private static final int ADVANCE_PAST_EQUALS = 2; 172 173 private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3; 174 175 private static final int COLLECT_QUOTED_VALUE = 4; 176 177 private static final int COLLECT_UNQUOTED_VALUE = 5; 178 179 /** 180 * Conversions needed by {@link #convertEntitiesToPlain(String)}. 181 */ 182 183 private static final String[] CONVERSIONS = 184 { "<", "<", ">", ">", """, "\"", "&", "&" }; 185 186 /** 187 * Attribute name used to identify components. 188 * 189 * @since 4.0 190 */ 191 192 private String _componentAttributeName; 193 194 private Pattern _simpleIdPattern; 195 196 private Pattern _implicitIdPattern; 197 198 private PatternMatcher _patternMatcher; 199 200 private IdAllocator _idAllocator = new IdAllocator(); 201 202 private ITemplateParserDelegate _delegate; 203 204 /** 205 * Identifies the template being parsed; used with error messages. 206 */ 207 208 private Resource _resourceLocation; 209 210 /** 211 * Shared instance of {@link Location} used by all {@link TextToken} instances in the template. 212 */ 213 214 private Location _templateLocation; 215 216 /** 217 * Location with in the resource for the current line. 218 */ 219 220 private Location _currentLocation; 221 222 /** 223 * Local reference to the template data that is to be parsed. 224 */ 225 226 private char[] _templateData; 227 228 /** 229 * List of Tag. 230 */ 231 232 private List _stack = new ArrayList(); 233 234 /** 235 * 236 * @author hls 237 */ 238 private static class Tag 239 { 240 // The element, i.e., <jwc> or virtually any other element (via jwcid attribute) 241 String _tagName; 242 243 // If true, the tag is a placeholder for a dynamic element 244 boolean _component; 245 246 // If true, the body of the tag is being ignored, and the 247 // ignore flag is cleared when the close tag is reached 248 boolean _ignoringBody; 249 250 // If true, then the entire tag (and its body) is being ignored 251 boolean _removeTag; 252 253 // If true, then the tag must have a balanced closing tag. 254 // This is always true for components. 255 boolean _mustBalance; 256 257 // The line on which the start tag exists 258 int _line; 259 260 // If true, then the parse ends when the closing tag is found. 261 boolean _content; 262 263 Tag(String tagName, int line) 264 { 265 _tagName = tagName; 266 _line = line; 267 } 268 269 boolean match(String matchTagName) 270 { 271 return _tagName.equalsIgnoreCase(matchTagName); 272 } 273 } 274 275 /** 276 * List of {@link TemplateToken}, this forms the ultimate response. 277 */ 278 279 private List _tokens = new ArrayList(); 280 281 /** 282 * The location of the 'cursor' within the template data. The advance() method moves this 283 * forward. 284 */ 285 286 private int _cursor; 287 288 /** 289 * The start of the current block of static text, or -1 if no block is active. 290 */ 291 292 private int _blockStart; 293 294 /** 295 * The current line number; tracked by advance(). Starts at 1. 296 */ 297 298 private int _line; 299 300 /** 301 * Set to true when the body of a tag is being ignored. This is typically used to skip over the 302 * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid 303 * of $remove$ is used. 304 */ 305 306 private boolean _ignoring; 307 308 /** 309 * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag. 310 */ 311 312 private Map _attributes = new HashMap(); 313 314 /** 315 * A factory used to create template tokens. 316 */ 317 318 private TemplateTokenFactory _factory; 319 320 public TemplateParser() 321 { 322 Perl5Compiler compiler = new Perl5Compiler(); 323 324 try 325 { 326 _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN); 327 _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN); 328 } 329 catch (MalformedPatternException ex) 330 { 331 throw new ApplicationRuntimeException(ex); 332 } 333 334 _patternMatcher = new Perl5Matcher(); 335 } 336 337 /** 338 * Parses the template data into an array of {@link TemplateToken}s. 339 * <p> 340 * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single 341 * thread accesses it. 342 * 343 * @param templateData 344 * the HTML template to parse. Some tokens will hold a reference to this array. 345 * @param delegate 346 * object that "knows" about defined components 347 * @param resourceLocation 348 * a description of where the template originated from, used with error messages. 349 */ 350 351 public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate, 352 Resource resourceLocation) throws TemplateParseException 353 { 354 try 355 { 356 beforeParse(templateData, delegate, resourceLocation); 357 358 parse(); 359 360 return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]); 361 } 362 finally 363 { 364 afterParse(); 365 } 366 } 367 368 /** 369 * perform default initialization of the parser. 370 */ 371 372 protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate, Resource resourceLocation) 373 { 374 _templateData = templateData; 375 _resourceLocation = resourceLocation; 376 _templateLocation = new LocationImpl(resourceLocation); 377 _delegate = delegate; 378 _ignoring = false; 379 _line = 1; 380 _componentAttributeName = delegate.getComponentAttributeName(); 381 } 382 383 /** 384 * Perform default cleanup after parsing completes. 385 */ 386 387 protected void afterParse() 388 { 389 _delegate = null; 390 _templateData = null; 391 _resourceLocation = null; 392 _templateLocation = null; 393 _currentLocation = null; 394 _stack.clear(); 395 _tokens.clear(); 396 _attributes.clear(); 397 _idAllocator.clear(); 398 } 399 400 /** 401 * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem 402 * is reported. 403 * <p> 404 * The default implementation simply throws an exception that contains the message and location 405 * parameters. 406 * <p> 407 * Subclasses may override but <b>must </b> ensure they throw the required exception. 408 * 409 * @param message 410 * @param location 411 * @param line 412 * ignored by the default impl 413 * @param cursor 414 * ignored by the default impl 415 * @throws TemplateParseException 416 * always thrown in order to terminate the parse. 417 */ 418 419 protected void templateParseProblem(String message, Location location, int line, int cursor) 420 throws TemplateParseException 421 { 422 throw new TemplateParseException(message, location); 423 } 424 425 /** 426 * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must 427 * </b> stop when a problem is reported. 428 * <p> 429 * The default implementation simply rethrows the exception. 430 * <p> 431 * Subclasses may override but <b>must </b> ensure they rethrow the exception. 432 * 433 * @param exception 434 * @param line 435 * ignored by the default impl 436 * @param cursor 437 * ignored by the default impl 438 * @throws ApplicationRuntimeException 439 * always rethrown in order to terminate the parse. 440 */ 441 442 protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor) 443 { 444 throw exception; 445 } 446 447 /** 448 * Give subclasses access to the parse results. 449 */ 450 protected List getTokens() 451 { 452 if (_tokens == null) 453 return Collections.EMPTY_LIST; 454 455 return _tokens; 456 } 457 458 /** 459 * Checks to see if the next few characters match a given pattern. 460 */ 461 462 private boolean lookahead(char[] match) 463 { 464 try 465 { 466 for (int i = 0; i < match.length; i++) 467 { 468 if (_templateData[_cursor + i] != match[i]) 469 return false; 470 } 471 472 // Every character matched. 473 474 return true; 475 } 476 catch (IndexOutOfBoundsException ex) 477 { 478 return false; 479 } 480 } 481 482 protected void parse() throws TemplateParseException 483 { 484 _cursor = 0; 485 _blockStart = -1; 486 int length = _templateData.length; 487 488 while (_cursor < length) 489 { 490 if (_templateData[_cursor] != '<') 491 { 492 if (_blockStart < 0 && !_ignoring) 493 _blockStart = _cursor; 494 495 advance(); 496 continue; 497 } 498 499 // OK, start of something. 500 501 if (lookahead(CLOSE_TAG)) 502 { 503 closeTag(); 504 continue; 505 } 506 507 if (lookahead(COMMENT_START)) 508 { 509 skipComment(); 510 continue; 511 } 512 513 // The start of some tag. 514 515 startTag(); 516 } 517 518 // Usually there's some text at the end of the template (after the last closing tag) that 519 // should 520 // be added. Often the last few tags are static tags so we definately 521 // need to end the text block. 522 523 addTextToken(_templateData.length - 1); 524 } 525 526 /** 527 * Advance forward in the document until the end of the comment is reached. In addition, skip 528 * any whitespace following the comment. 529 */ 530 531 private void skipComment() throws TemplateParseException 532 { 533 int length = _templateData.length; 534 int startLine = _line; 535 536 if (_blockStart < 0 && !_ignoring) 537 _blockStart = _cursor; 538 539 while (true) 540 { 541 if (_cursor >= length) 542 templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl( 543 _resourceLocation, startLine), startLine, _cursor); 544 545 if (lookahead(COMMENT_END)) 546 break; 547 548 // Not the end of the comment, advance over it. 549 550 advance(); 551 } 552 553 _cursor += COMMENT_END.length; 554 advanceOverWhitespace(); 555 } 556 557 private void addTextToken(int end) 558 { 559 // No active block to add to. 560 561 if (_blockStart < 0) 562 return; 563 564 if (_blockStart <= end) 565 { 566 // This seems odd, shouldn't the location be the current location? I guess 567 // no errors are ever reported for a text token. 568 569 TemplateToken token = _factory.createTextToken( 570 _templateData, 571 _blockStart, 572 end, 573 _templateLocation); 574 575 _tokens.add(token); 576 } 577 578 _blockStart = -1; 579 } 580 581 private void startTag() throws TemplateParseException 582 { 583 int cursorStart = _cursor; 584 int length = _templateData.length; 585 String tagName = null; 586 boolean endOfTag = false; 587 boolean emptyTag = false; 588 int startLine = _line; 589 Location startLocation = new LocationImpl(_resourceLocation, startLine); 590 591 tagBeginEvent(startLine, _cursor); 592 593 advance(); 594 595 // Collect the element type 596 597 while (_cursor < length) 598 { 599 char ch = _templateData[_cursor]; 600 601 if (ch == '/' || ch == '>' || Character.isWhitespace(ch)) 602 { 603 tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1); 604 605 break; 606 } 607 608 advance(); 609 } 610 611 String attributeName = null; 612 int attributeNameStart = -1; 613 int attributeValueStart = -1; 614 int state = WAIT_FOR_ATTRIBUTE_NAME; 615 char quoteChar = 0; 616 617 _attributes.clear(); 618 619 // Collect each attribute 620 621 while (!endOfTag) 622 { 623 if (_cursor >= length) 624 { 625 String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine) 626 : ParseMessages.unclosedTag(tagName, startLine); 627 628 templateParseProblem(message, startLocation, startLine, cursorStart); 629 } 630 631 char ch = _templateData[_cursor]; 632 633 switch (state) 634 { 635 case WAIT_FOR_ATTRIBUTE_NAME: 636 637 // Ignore whitespace before the next attribute name, while 638 // looking for the end of the current tag. 639 640 if (ch == '/') 641 { 642 emptyTag = true; 643 advance(); 644 break; 645 } 646 647 if (ch == '>') 648 { 649 endOfTag = true; 650 break; 651 } 652 653 if (Character.isWhitespace(ch)) 654 { 655 advance(); 656 break; 657 } 658 659 // Found non-whitespace, assume its the attribute name. 660 // Note: could use a check here for non-alpha. 661 662 attributeNameStart = _cursor; 663 state = COLLECT_ATTRIBUTE_NAME; 664 advance(); 665 break; 666 667 case COLLECT_ATTRIBUTE_NAME: 668 669 // Looking for end of attribute name. 670 671 if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch)) 672 { 673 attributeName = new String(_templateData, attributeNameStart, _cursor 674 - attributeNameStart); 675 676 state = ADVANCE_PAST_EQUALS; 677 break; 678 } 679 680 // Part of the attribute name 681 682 advance(); 683 break; 684 685 case ADVANCE_PAST_EQUALS: 686 687 // Looking for the '=' sign. May hit the end of the tag, or (for bare 688 // attributes), 689 // the next attribute name. 690 691 if (ch == '/' || ch == '>') 692 { 693 // A bare attribute, which is not interesting to 694 // us. 695 696 state = WAIT_FOR_ATTRIBUTE_NAME; 697 break; 698 } 699 700 if (Character.isWhitespace(ch)) 701 { 702 advance(); 703 break; 704 } 705 706 if (ch == '=') 707 { 708 state = WAIT_FOR_ATTRIBUTE_VALUE; 709 quoteChar = 0; 710 attributeValueStart = -1; 711 advance(); 712 break; 713 } 714 715 // Otherwise, an HTML style "bare" attribute (such as <select multiple>). 716 // We aren't interested in those (we're just looking for the id or jwcid 717 // attribute). 718 719 state = WAIT_FOR_ATTRIBUTE_NAME; 720 break; 721 722 case WAIT_FOR_ATTRIBUTE_VALUE: 723 724 if (ch == '/' || ch == '>') 725 templateParseProblem(ParseMessages.missingAttributeValue( 726 tagName, 727 _line, 728 attributeName), getCurrentLocation(), _line, _cursor); 729 730 // Ignore whitespace between '=' and the attribute value. Also, look 731 // for initial quote. 732 733 if (Character.isWhitespace(ch)) 734 { 735 advance(); 736 break; 737 } 738 739 if (ch == '\'' || ch == '"') 740 { 741 quoteChar = ch; 742 743 state = COLLECT_QUOTED_VALUE; 744 advance(); 745 attributeValueStart = _cursor; 746 attributeBeginEvent(attributeName, _line, attributeValueStart); 747 break; 748 } 749 750 // Not whitespace or quote, must be start of unquoted attribute. 751 752 state = COLLECT_UNQUOTED_VALUE; 753 attributeValueStart = _cursor; 754 attributeBeginEvent(attributeName, _line, attributeValueStart); 755 break; 756 757 case COLLECT_QUOTED_VALUE: 758 759 // Start collecting the quoted attribute value. Stop at the matching quote 760 // character, 761 // unless bare, in which case, stop at the next whitespace. 762 763 if (ch == quoteChar) 764 { 765 String attributeValue = new String(_templateData, attributeValueStart, 766 _cursor - attributeValueStart); 767 768 attributeEndEvent(_cursor); 769 770 addAttributeIfUnique(tagName, attributeName, attributeValue); 771 772 // Advance over the quote. 773 advance(); 774 state = WAIT_FOR_ATTRIBUTE_NAME; 775 break; 776 } 777 778 advance(); 779 break; 780 781 case COLLECT_UNQUOTED_VALUE: 782 783 // An unquoted attribute value ends with whitespace 784 // or the end of the enclosing tag. 785 786 if (ch == '/' || ch == '>' || Character.isWhitespace(ch)) 787 { 788 String attributeValue = new String(_templateData, attributeValueStart, 789 _cursor - attributeValueStart); 790 791 attributeEndEvent(_cursor); 792 addAttributeIfUnique(tagName, attributeName, attributeValue); 793 794 state = WAIT_FOR_ATTRIBUTE_NAME; 795 break; 796 } 797 798 advance(); 799 break; 800 } 801 } 802 803 tagEndEvent(_cursor); 804 805 // Check for invisible localizations 806 807 String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes); 808 String jwcId = findValueCaselessly(_componentAttributeName, _attributes); 809 810 if (localizationKey != null && jwcId == null) 811 { 812 if (_ignoring) 813 templateParseProblem( 814 ParseMessages.componentMayNotBeIgnored(tagName, startLine), 815 startLocation, 816 startLine, 817 cursorStart); 818 819 // If the tag isn't empty, then create a Tag instance to ignore the 820 // body of the tag. 821 822 if (!emptyTag) 823 { 824 Tag tag = new Tag(tagName, startLine); 825 826 tag._component = false; 827 tag._removeTag = false; 828 tag._ignoringBody = true; 829 tag._mustBalance = true; 830 831 _stack.add(tag); 832 833 // Start ignoring content until the close tag. 834 835 _ignoring = true; 836 } 837 else 838 { 839 // Cursor is at the closing carat, advance over it. 840 advance(); 841 // TAPESTRY-359: *don't* skip whitespace advanceOverWhitespace() 842 } 843 844 // End any open block. 845 846 addTextToken(cursorStart - 1); 847 848 boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes); 849 850 Map attributes = filter(_attributes, new String[] { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME }); 851 852 TemplateToken token = _factory.createLocalizationToken( 853 tagName, 854 localizationKey, 855 raw, 856 attributes, 857 startLocation); 858 859 _tokens.add(token); 860 861 return; 862 } 863 864 if (jwcId != null) 865 { 866 processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation); 867 return; 868 } 869 870 // A static tag (not a tag without a jwcid attribute). 871 // We need to record this so that we can match close tags later. 872 873 if (!emptyTag) 874 { 875 Tag tag = new Tag(tagName, startLine); 876 _stack.add(tag); 877 } 878 879 // If there wasn't an active block, then start one. 880 881 if (_blockStart < 0 && !_ignoring) 882 _blockStart = cursorStart; 883 884 advance(); 885 } 886 887 /** 888 * @throws TemplateParseException 889 * @since 4.0 890 */ 891 892 private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue) 893 throws TemplateParseException 894 { 895 896 if (_attributes.containsKey(attributeName)) 897 templateParseProblem( 898 ParseMessages.duplicateTagAttribute(tagName, _line, attributeName), 899 getCurrentLocation(), 900 _line, 901 _cursor); 902 903 _attributes.put(attributeName, attributeValue); 904 } 905 906 /** 907 * Processes a tag that is the open tag for a component (but also handles the $remove$ and 908 * $content$ tags). 909 */ 910 911 /** 912 * Notify that the beginning of a tag has been detected. 913 * <p> 914 * Default implementation does nothing. 915 */ 916 protected void tagBeginEvent(int startLine, int cursorPosition) 917 { 918 } 919 920 /** 921 * Notify that the end of the current tag has been detected. 922 * <p> 923 * Default implementation does nothing. 924 */ 925 protected void tagEndEvent(int cursorPosition) 926 { 927 } 928 929 /** 930 * Notify that the beginning of an attribute value has been detected. 931 * <p> 932 * Default implementation does nothing. 933 */ 934 protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition) 935 { 936 } 937 938 /** 939 * Notify that the end of the current attribute value has been detected. 940 * <p> 941 * Default implementation does nothing. 942 */ 943 protected void attributeEndEvent(int cursorPosition) 944 { 945 } 946 947 private void processComponentStart(String tagName, String jwcId, boolean emptyTag, 948 int startLine, int cursorStart, Location startLocation) throws TemplateParseException 949 { 950 String componentId = jwcId; 951 if (componentId.equalsIgnoreCase(CONTENT_ID)) 952 { 953 processContentTag(tagName, startLine, cursorStart, emptyTag); 954 955 return; 956 } 957 958 boolean isRemoveId = componentId.equalsIgnoreCase(REMOVE_ID); 959 960 if (_ignoring && !isRemoveId) 961 templateParseProblem( 962 ParseMessages.componentMayNotBeIgnored(tagName, startLine), 963 startLocation, 964 startLine, 965 cursorStart); 966 967 String type = null; 968 boolean allowBody = false; 969 970 if (_patternMatcher.matches(componentId, _implicitIdPattern)) 971 { 972 MatchResult match = _patternMatcher.getMatch(); 973 974 componentId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP); 975 type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP); 976 977 String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP); 978 String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP); 979 980 // If (and this is typical) no actual component id was specified, 981 // then generate one on the fly. 982 // The allocated id for anonymous components is 983 // based on the simple (unprefixed) type, but starts 984 // with a leading dollar sign to ensure no conflicts 985 // with user defined component ids (which don't allow dollar signs 986 // in the id). 987 // New for 4.0: the component type may included slashes ('/'), but these 988 // are not valid identifiers, so we convert them to '$'. 989 990 if (componentId == null) 991 componentId = _idAllocator.allocateId("$" + simpleType.replace('/', '$')); 992 993 try 994 { 995 allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation); 996 } 997 catch (ApplicationRuntimeException e) 998 { 999 // give subclasses a chance to handle and rethrow 1000 templateParseProblem(e, startLine, cursorStart); 1001 } 1002 1003 } 1004 else 1005 { 1006 if (!isRemoveId) 1007 { 1008 if (!_patternMatcher.matches(componentId, _simpleIdPattern)) 1009 templateParseProblem( 1010 ParseMessages.componentIdInvalid(tagName, startLine, componentId), 1011 startLocation, 1012 startLine, 1013 cursorStart); 1014 1015 if (!_delegate.getKnownComponent(componentId)) 1016 templateParseProblem( 1017 ParseMessages.unknownComponentId(tagName, startLine, componentId), 1018 startLocation, 1019 startLine, 1020 cursorStart); 1021 1022 try 1023 { 1024 allowBody = _delegate.getAllowBody(componentId, startLocation); 1025 } 1026 catch (ApplicationRuntimeException e) 1027 { 1028 // give subclasses a chance to handle and rethrow 1029 templateParseProblem(e, startLine, cursorStart); 1030 } 1031 } 1032 } 1033 1034 // Ignore the body if we're removing the entire tag, 1035 // of if the corresponding component doesn't allow 1036 // a body. 1037 1038 boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody); 1039 1040 if (_ignoring && ignoreBody) 1041 templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl( 1042 _resourceLocation, startLine), startLine, cursorStart); 1043 1044 if (!emptyTag) 1045 pushNewTag(tagName, startLine, isRemoveId, ignoreBody); 1046 1047 // End any open block. 1048 1049 addTextToken(cursorStart - 1); 1050 1051 if (!isRemoveId) 1052 { 1053 addOpenToken(tagName, componentId, type, startLocation); 1054 1055 if (emptyTag) 1056 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation())); 1057 } 1058 1059 advance(); 1060 } 1061 1062 private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody) 1063 { 1064 Tag tag = new Tag(tagName, startLine); 1065 1066 tag._component = !isRemoveId; 1067 tag._removeTag = isRemoveId; 1068 1069 tag._ignoringBody = ignoreBody; 1070 1071 _ignoring = tag._ignoringBody; 1072 1073 tag._mustBalance = true; 1074 1075 _stack.add(tag); 1076 } 1077 1078 private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag) 1079 throws TemplateParseException 1080 { 1081 if (_ignoring) 1082 templateParseProblem( 1083 ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine), 1084 new LocationImpl(_resourceLocation, startLine), 1085 startLine, 1086 cursorStart); 1087 1088 if (emptyTag) 1089 templateParseProblem( 1090 ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine), 1091 new LocationImpl(_resourceLocation, startLine), 1092 startLine, 1093 cursorStart); 1094 1095 _tokens.clear(); 1096 _blockStart = -1; 1097 1098 Tag tag = new Tag(tagName, startLine); 1099 1100 tag._mustBalance = true; 1101 tag._content = true; 1102 1103 _stack.clear(); 1104 _stack.add(tag); 1105 1106 advance(); 1107 } 1108 1109 private void addOpenToken(String tagName, String jwcId, String type, Location location) 1110 { 1111 OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location); 1112 _tokens.add(token); 1113 1114 if (_attributes.isEmpty()) 1115 return; 1116 1117 Iterator i = _attributes.entrySet().iterator(); 1118 while (i.hasNext()) 1119 { 1120 Map.Entry entry = (Map.Entry) i.next(); 1121 1122 String key = (String) entry.getKey(); 1123 1124 if (key.equalsIgnoreCase(_componentAttributeName)) 1125 continue; 1126 1127 String value = (String) entry.getValue(); 1128 1129 addAttributeToToken(token, key, value); 1130 } 1131 } 1132 1133 /** 1134 * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream). 1135 * 1136 * @since 3.0 1137 */ 1138 1139 private void addAttributeToToken(OpenToken token, String name, String attributeValue) 1140 { 1141 token.addAttribute(name, convertEntitiesToPlain(attributeValue)); 1142 } 1143 1144 /** 1145 * Invoked to handle a closing tag, i.e., </foo>. When a tag closes, it will match against 1146 * a tag on the open tag start. Preferably the top tag on the stack (if everything is well 1147 * balanced), but this is HTML, not XML, so many tags won't balance. 1148 * <p> 1149 * Once the matching tag is located, the question is ... is the tag dynamic or static? If 1150 * static, then the current text block is extended to include this close tag. If dynamic, then 1151 * the current text block is ended (before the '<' that starts the tag) and a close token is 1152 * added. 1153 * <p> 1154 * In either case, the matching static element and anything above it is removed, and the cursor 1155 * is left on the character following the '>'. 1156 */ 1157 1158 private void closeTag() throws TemplateParseException 1159 { 1160 int cursorStart = _cursor; 1161 int length = _templateData.length; 1162 int startLine = _line; 1163 1164 Location startLocation = getCurrentLocation(); 1165 1166 _cursor += CLOSE_TAG.length; 1167 1168 int tagStart = _cursor; 1169 1170 while (true) 1171 { 1172 if (_cursor >= length) 1173 templateParseProblem( 1174 ParseMessages.incompleteCloseTag(startLine), 1175 startLocation, 1176 startLine, 1177 cursorStart); 1178 1179 char ch = _templateData[_cursor]; 1180 1181 if (ch == '>') 1182 break; 1183 1184 advance(); 1185 } 1186 1187 String tagName = new String(_templateData, tagStart, _cursor - tagStart); 1188 1189 int stackPos = _stack.size() - 1; 1190 Tag tag = null; 1191 1192 while (stackPos >= 0) 1193 { 1194 tag = (Tag) _stack.get(stackPos); 1195 1196 if (tag.match(tagName)) 1197 break; 1198 1199 if (tag._mustBalance) 1200 templateParseProblem(ParseMessages.improperlyNestedCloseTag( 1201 tagName, 1202 startLine, 1203 tag._tagName, 1204 tag._line), startLocation, startLine, cursorStart); 1205 1206 stackPos--; 1207 } 1208 1209 if (stackPos < 0) 1210 templateParseProblem( 1211 ParseMessages.unmatchedCloseTag(tagName, startLine), 1212 startLocation, 1213 startLine, 1214 cursorStart); 1215 1216 // Special case for the content tag 1217 1218 if (tag._content) 1219 { 1220 addTextToken(cursorStart - 1); 1221 1222 // Advance the cursor right to the end. 1223 1224 _cursor = length; 1225 _stack.clear(); 1226 return; 1227 } 1228 1229 // When a component closes, add a CLOSE tag. 1230 if (tag._component) 1231 { 1232 addTextToken(cursorStart - 1); 1233 1234 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation())); 1235 } 1236 else 1237 { 1238 // The close of a static tag. Unless removing the tag 1239 // entirely, make sure the block tag is part of a text block. 1240 1241 if (_blockStart < 0 && !tag._removeTag && !_ignoring) 1242 _blockStart = cursorStart; 1243 } 1244 1245 // Remove all elements at stackPos or above. 1246 1247 for (int i = _stack.size() - 1; i >= stackPos; i--) 1248 _stack.remove(i); 1249 1250 // Advance cursor past '>' 1251 1252 advance(); 1253 1254 // If editting out the tag (i.e., $remove$) then kill any whitespace. 1255 // For components that simply don't contain a body, removeTag will 1256 // be false. 1257 1258 if (tag._removeTag) 1259 advanceOverWhitespace(); 1260 1261 // If we were ignoring the body of the tag, then clear the ignoring 1262 // flag, since we're out of the body. 1263 1264 if (tag._ignoringBody) 1265 _ignoring = false; 1266 } 1267 1268 /** 1269 * Advances the cursor to the next character. If the end-of-line is reached, then increments the 1270 * line counter. 1271 */ 1272 1273 private void advance() 1274 { 1275 int length = _templateData.length; 1276 1277 if (_cursor >= length) 1278 return; 1279 1280 char ch = _templateData[_cursor]; 1281 1282 _cursor++; 1283 1284 if (ch == '\n') 1285 { 1286 _line++; 1287 _currentLocation = null; 1288 return; 1289 } 1290 1291 // A \r, or a \r\n also counts as a new line. 1292 1293 if (ch == '\r') 1294 { 1295 _line++; 1296 _currentLocation = null; 1297 1298 if (_cursor < length && _templateData[_cursor] == '\n') 1299 _cursor++; 1300 1301 return; 1302 } 1303 1304 // Not an end-of-line character. 1305 } 1306 1307 private void advanceOverWhitespace() 1308 { 1309 int length = _templateData.length; 1310 1311 while (_cursor < length) 1312 { 1313 char ch = _templateData[_cursor]; 1314 if (!Character.isWhitespace(ch)) 1315 return; 1316 1317 advance(); 1318 } 1319 } 1320 1321 /** 1322 * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list 1323 * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded 1324 * from the output map. May return null (rather than return an empty Map). 1325 */ 1326 1327 private Map filter(Map input, String[] removeKeys) 1328 { 1329 if (input == null || input.isEmpty()) 1330 return null; 1331 1332 Map result = null; 1333 1334 Iterator i = input.entrySet().iterator(); 1335 1336 nextkey: while (i.hasNext()) 1337 { 1338 Map.Entry entry = (Map.Entry) i.next(); 1339 1340 String key = (String) entry.getKey(); 1341 1342 for (int j = 0; j < removeKeys.length; j++) 1343 { 1344 if (key.equalsIgnoreCase(removeKeys[j])) 1345 continue nextkey; 1346 } 1347 1348 if (result == null) 1349 result = new HashMap(input.size()); 1350 1351 result.put(key, entry.getValue()); 1352 } 1353 1354 return result; 1355 } 1356 1357 /** 1358 * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys 1359 * and values. Returns the value for the first key found that matches (caselessly) the input 1360 * key. Returns null if no value found. 1361 */ 1362 1363 protected String findValueCaselessly(String key, Map map) 1364 { 1365 String result = (String) map.get(key); 1366 1367 if (result != null) 1368 return result; 1369 1370 Iterator i = map.entrySet().iterator(); 1371 while (i.hasNext()) 1372 { 1373 Map.Entry entry = (Map.Entry) i.next(); 1374 1375 String entryKey = (String) entry.getKey(); 1376 1377 if (entryKey.equalsIgnoreCase(key)) 1378 return (String) entry.getValue(); 1379 } 1380 1381 return null; 1382 } 1383 1384 /** 1385 * Provided a raw input string that has been recognized to be an expression, this removes excess 1386 * white space and converts &amp;;, &quot;; &lt;; and &gt;; to their normal 1387 * character values (otherwise its impossible to specify those values in expressions in the 1388 * template). 1389 */ 1390 1391 private String convertEntitiesToPlain(String input) 1392 { 1393 int inputLength = input.length(); 1394 1395 StringBuffer buffer = new StringBuffer(inputLength); 1396 1397 int cursor = 0; 1398 1399 outer: while (cursor < inputLength) 1400 { 1401 for (int i = 0; i < CONVERSIONS.length; i += 2) 1402 { 1403 String entity = CONVERSIONS[i]; 1404 int entityLength = entity.length(); 1405 String value = CONVERSIONS[i + 1]; 1406 1407 if (cursor + entityLength > inputLength) 1408 continue; 1409 1410 if (input.substring(cursor, cursor + entityLength).equals(entity)) 1411 { 1412 buffer.append(value); 1413 cursor += entityLength; 1414 continue outer; 1415 } 1416 } 1417 1418 buffer.append(input.charAt(cursor)); 1419 cursor++; 1420 } 1421 1422 return buffer.toString().trim(); 1423 } 1424 1425 /** 1426 * Returns true if the map contains the given key (caseless search) and the value is "true" 1427 * (caseless comparison). 1428 */ 1429 1430 private boolean checkBoolean(String key, Map map) 1431 { 1432 String value = findValueCaselessly(key, map); 1433 1434 if (value == null) 1435 return false; 1436 1437 return value.equalsIgnoreCase("true"); 1438 } 1439 1440 /** 1441 * Gets the current location within the file. This allows the location to be created only as 1442 * needed, and multiple objects on the same line can share the same Location instance. 1443 * 1444 * @since 3.0 1445 */ 1446 1447 protected Location getCurrentLocation() 1448 { 1449 if (_currentLocation == null) 1450 _currentLocation = new LocationImpl(_resourceLocation, _line); 1451 1452 return _currentLocation; 1453 } 1454 1455 public void setFactory(TemplateTokenFactory factory) 1456 { 1457 _factory = factory; 1458 } 1459 1460 }