001 package org.apache.tapestry.json; 002 003 /* 004 Copyright (c) 2002 JSON.org 005 006 Permission is hereby granted, free of charge, to any person obtaining a copy 007 of this software and associated documentation files (the "Software"), to deal 008 in the Software without restriction, including without limitation the rights 009 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 010 copies of the Software, and to permit persons to whom the Software is 011 furnished to do so, subject to the following conditions: 012 013 The above copyright notice and this permission notice shall be included in all 014 copies or substantial portions of the Software. 015 016 The Software shall be used for Good, not Evil. 017 018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 020 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 021 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 022 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 023 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 024 SOFTWARE. 025 */ 026 027 import java.text.ParseException; 028 029 /** 030 * The XMLTokener extends the JSONTokener to provide additional methods 031 * for the parsing of XML texts. 032 * @author JSON.org 033 * @version 0.1 034 */ 035 public class XMLTokener extends JSONTokener { 036 037 038 /** The table of entity values. It initially contains Character values for 039 * amp, apos, gt, lt, quot. 040 */ 041 public static final java.util.HashMap entity; 042 043 static { 044 entity = new java.util.HashMap(8); 045 entity.put("amp", XML.AMP); 046 entity.put("apos", XML.APOS); 047 entity.put("gt", XML.GT); 048 entity.put("lt", XML.LT); 049 entity.put("quot", XML.QUOT); 050 } 051 052 /** 053 * Construct an XMLTokener from a string. 054 * @param s A source string. 055 */ 056 public XMLTokener(String s) { 057 super(s); 058 } 059 060 061 /** 062 * Get the next XML outer token, trimming whitespace. There are two kinds 063 * of tokens: the '<' character which begins a markup tag, and the content 064 * text between markup tags. 065 * 066 * @return A string, or a '<' Character, or null if there is no more 067 * source text. 068 * @throws ParseException 069 */ 070 public Object nextContent() throws ParseException { 071 char c; 072 StringBuffer sb; 073 do { 074 c = next(); 075 } while (Character.isWhitespace(c)); 076 if (c == 0) { 077 return null; 078 } 079 if (c == '<') { 080 return XML.LT; 081 } 082 sb = new StringBuffer(); 083 while (true) { 084 if (c == '<' || c == 0) { 085 back(); 086 return sb.toString().trim(); 087 } 088 if (c == '&') { 089 sb.append(nextEntity(c)); 090 } else { 091 sb.append(c); 092 } 093 c = next(); 094 } 095 } 096 097 098 /** 099 * Return the next entity. These entities are translated to Characters: 100 * & ' > < " 101 * @param a An ampersand character. 102 * @return A Character or an entity String if the entity is not recognized. 103 * @throws ParseException Missing ';' in XML entity 104 */ 105 public Object nextEntity(char a) throws ParseException { 106 StringBuffer sb = new StringBuffer(); 107 while (true) { 108 char c = next(); 109 if (Character.isLetter(c)) { 110 sb.append(Character.toLowerCase(c)); 111 } else if (c == ';') { 112 break; 113 } else { 114 throw syntaxError("Missing ';' in XML entity: &" + sb); 115 } 116 } 117 String s = sb.toString(); 118 Object e = entity.get(s); 119 return e != null ? e : a + s + ";"; 120 } 121 122 123 /** 124 * Returns the next XML meta token. This is used for skipping over <!...> 125 * and <?...?> structures. 126 * @return Syntax characters (< > / = ! ?) are returned as Character, and 127 * strings and names are returned as Boolean. We don't care what the 128 * values actually are. 129 * @throws ParseException 130 */ 131 public Object nextMeta() throws ParseException { 132 char c; 133 char q; 134 do { 135 c = next(); 136 } while (Character.isWhitespace(c)); 137 switch (c) { 138 case 0: 139 throw syntaxError("Misshaped meta tag."); 140 case '<': 141 return XML.LT; 142 case '>': 143 return XML.GT; 144 case '/': 145 return XML.SLASH; 146 case '=': 147 return XML.EQ; 148 case '!': 149 return XML.BANG; 150 case '?': 151 return XML.QUEST; 152 case '"': 153 case '\'': 154 q = c; 155 while (true) { 156 c = next(); 157 if (c == 0) { 158 throw syntaxError("Unterminated string."); 159 } 160 if (c == q) { 161 return Boolean.TRUE; 162 } 163 } 164 default: 165 while (true) { 166 c = next(); 167 if (Character.isWhitespace(c)) { 168 return Boolean.TRUE; 169 } 170 switch (c) { 171 case 0: 172 case '<': 173 case '>': 174 case '/': 175 case '=': 176 case '!': 177 case '?': 178 case '"': 179 case '\'': 180 back(); 181 return Boolean.TRUE; 182 } 183 } 184 } 185 } 186 187 188 /** 189 * Get the next XML Token. These tokens are found inside of angle 190 * brackets. It may be one of these characters: / > = ! ? or it may be a 191 * string wrapped in single quotes or double quotes, or it may be a name. 192 * @return a String or a Character. 193 * @throws ParseException 194 */ 195 public Object nextToken() throws ParseException { 196 char c; 197 char q; 198 StringBuffer sb; 199 do { 200 c = next(); 201 } while (Character.isWhitespace(c)); 202 switch (c) { 203 case 0: 204 throw syntaxError("Misshaped element."); 205 case '<': 206 throw syntaxError("Misplaced '<'."); 207 case '>': 208 return XML.GT; 209 case '/': 210 return XML.SLASH; 211 case '=': 212 return XML.EQ; 213 case '!': 214 return XML.BANG; 215 case '?': 216 return XML.QUEST; 217 218 // Quoted string 219 220 case '"': 221 case '\'': 222 q = c; 223 sb = new StringBuffer(); 224 while (true) { 225 c = next(); 226 if (c == 0) { 227 throw syntaxError("Unterminated string."); 228 } 229 if (c == q) { 230 return sb.toString(); 231 } 232 if (c == '&') { 233 sb.append(nextEntity(c)); 234 } else { 235 sb.append(c); 236 } 237 } 238 default: 239 240 // Name 241 242 sb = new StringBuffer(); 243 while (true) { 244 sb.append(c); 245 c = next(); 246 if (Character.isWhitespace(c)) { 247 return sb.toString(); 248 } 249 switch (c) { 250 case 0: 251 case '>': 252 case '/': 253 case '=': 254 case '!': 255 case '?': 256 back(); 257 return sb.toString(); 258 case '<': 259 case '"': 260 case '\'': 261 throw syntaxError("Bad character in a name."); 262 } 263 } 264 } 265 } 266 }