001    package org.apache.tapestry.json;
002    
003    /*
004    Copyright (c) 2002 JSON.org
005    
006    Permission is hereby granted, free of charge, to any person obtaining a copy
007    of this software and associated documentation files (the "Software"), to deal
008    in the Software without restriction, including without limitation the rights
009    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
010    copies of the Software, and to permit persons to whom the Software is
011    furnished to do so, subject to the following conditions:
012    
013    The above copyright notice and this permission notice shall be included in all
014    copies or substantial portions of the Software.
015    
016    The Software shall be used for Good, not Evil.
017    
018    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
019    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
020    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
021    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
022    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
023    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
024    SOFTWARE.
025    */
026    
027    import java.text.ParseException;
028    
029    /**
030     * The XMLTokener extends the JSONTokener to provide additional methods
031     * for the parsing of XML texts.
032     * @author JSON.org
033     * @version 0.1
034     */
035    public class XMLTokener extends JSONTokener {
036    
037    
038       /** The table of entity values. It initially contains Character values for
039        * amp, apos, gt, lt, quot.
040        */
041       public static final java.util.HashMap entity;
042    
043       static {
044           entity = new java.util.HashMap(8);
045           entity.put("amp",  XML.AMP);
046           entity.put("apos", XML.APOS);
047           entity.put("gt",   XML.GT);
048           entity.put("lt",   XML.LT);
049           entity.put("quot", XML.QUOT);
050       }
051    
052        /**
053         * Construct an XMLTokener from a string.
054         * @param s A source string.
055         */
056        public XMLTokener(String s) {
057            super(s);
058        }
059    
060    
061        /**
062         * Get the next XML outer token, trimming whitespace. There are two kinds
063         * of tokens: the '<' character which begins a markup tag, and the content
064         * text between markup tags.
065         *
066         * @return  A string, or a '<' Character, or null if there is no more
067         * source text.
068         * @throws ParseException
069         */
070        public Object nextContent() throws ParseException {
071            char         c;
072            StringBuffer sb;
073            do {
074                c = next();
075            } while (Character.isWhitespace(c));
076            if (c == 0) {
077                return null;
078            }
079            if (c == '<') {
080                return XML.LT;
081            }
082            sb = new StringBuffer();
083            while (true) {
084                if (c == '<' || c == 0) {
085                    back();
086                    return sb.toString().trim();
087                }
088                if (c == '&') {
089                    sb.append(nextEntity(c));
090                } else {
091                    sb.append(c);
092                }
093                c = next();
094            }
095        }
096    
097    
098        /**
099         * Return the next entity. These entities are translated to Characters:
100         *     &amp;  &apos;  &gt;  &lt;  &quot;
101         * @param a An ampersand character.
102         * @return  A Character or an entity String if the entity is not recognized.
103         * @throws ParseException Missing ';' in XML entity
104         */
105        public Object nextEntity(char a) throws ParseException {
106            StringBuffer sb = new StringBuffer();
107            while (true) {
108                char c = next();
109                if (Character.isLetter(c)) {
110                    sb.append(Character.toLowerCase(c));
111                } else if (c == ';') {
112                    break;
113                } else {
114                    throw syntaxError("Missing ';' in XML entity: &" + sb);
115                }
116            }
117            String s = sb.toString();
118            Object e = entity.get(s);
119            return e != null ? e : a + s + ";";
120        }
121    
122    
123        /**
124         * Returns the next XML meta token. This is used for skipping over <!...>
125         * and <?...?> structures.
126         * @return Syntax characters (< > / = ! ?) are returned as Character, and
127         * strings and names are returned as Boolean. We don't care what the
128         * values actually are.
129         * @throws ParseException
130         */
131        public Object nextMeta() throws ParseException {
132            char c;
133            char q;
134            do {
135                c = next();
136            } while (Character.isWhitespace(c));
137            switch (c) {
138            case 0:
139                throw syntaxError("Misshaped meta tag.");
140            case '<':
141                return XML.LT;
142            case '>':
143                return XML.GT;
144            case '/':
145                return XML.SLASH;
146            case '=':
147                return XML.EQ;
148            case '!':
149                return XML.BANG;
150            case '?':
151                return XML.QUEST;
152            case '"':
153            case '\'':
154                q = c;
155                while (true) {
156                    c = next();
157                    if (c == 0) {
158                        throw syntaxError("Unterminated string.");
159                    }
160                    if (c == q) {
161                        return Boolean.TRUE;
162                    }
163                }
164            default:
165                while (true) {
166                    c = next();
167                    if (Character.isWhitespace(c)) {
168                        return Boolean.TRUE;
169                    }
170                    switch (c) {
171                    case 0:
172                    case '<':
173                    case '>':
174                    case '/':
175                    case '=':
176                    case '!':
177                    case '?':
178                    case '"':
179                    case '\'':
180                        back();
181                        return Boolean.TRUE;
182                    }
183                }
184            }
185        }
186    
187    
188        /**
189         * Get the next XML Token. These tokens are found inside of angle
190         * brackets. It may be one of these characters: / > = ! ? or it may be a
191         * string wrapped in single quotes or double quotes, or it may be a name.
192         * @return a String or a Character.
193         * @throws ParseException
194         */
195        public Object nextToken() throws ParseException {
196            char c;
197            char q;
198            StringBuffer sb;
199            do {
200                c = next();
201            } while (Character.isWhitespace(c));
202            switch (c) {
203            case 0:
204                throw syntaxError("Misshaped element.");
205            case '<':
206                throw syntaxError("Misplaced '<'.");
207            case '>':
208                return XML.GT;
209            case '/':
210                return XML.SLASH;
211            case '=':
212                return XML.EQ;
213            case '!':
214                return XML.BANG;
215            case '?':
216                return XML.QUEST;
217    
218    // Quoted string
219    
220            case '"':
221            case '\'':
222                q = c;
223                sb = new StringBuffer();
224                while (true) {
225                    c = next();
226                    if (c == 0) {
227                        throw syntaxError("Unterminated string.");
228                    }
229                    if (c == q) {
230                        return sb.toString();
231                    }
232                    if (c == '&') {
233                        sb.append(nextEntity(c));
234                    } else {
235                        sb.append(c);
236                    }
237                }
238            default:
239    
240    // Name
241    
242                sb = new StringBuffer();
243                while (true) {
244                    sb.append(c);
245                    c = next();
246                    if (Character.isWhitespace(c)) {
247                        return sb.toString();
248                    }
249                    switch (c) {
250                    case 0:
251                    case '>':
252                    case '/':
253                    case '=':
254                    case '!':
255                    case '?':
256                        back();
257                        return sb.toString();
258                    case '<':
259                    case '"':
260                    case '\'':
261                        throw syntaxError("Bad character in a name.");
262                    }
263                }
264            }
265        }
266    }