001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.hivemind.parse; 016 017 import java.util.ArrayList; 018 import java.util.HashMap; 019 import java.util.List; 020 import java.util.Map; 021 022 import org.apache.hivemind.ApplicationRuntimeException; 023 import org.apache.hivemind.HiveMind; 024 import org.apache.hivemind.Location; 025 import org.apache.hivemind.Resource; 026 import org.apache.hivemind.impl.LocationImpl; 027 import org.xml.sax.Attributes; 028 import org.xml.sax.Locator; 029 import org.xml.sax.SAXException; 030 import org.xml.sax.SAXParseException; 031 import org.xml.sax.helpers.DefaultHandler; 032 033 /** 034 * Abstract super-class for parsers based around the SAX event model. This class provides support 035 * for managing a stack of elements, making it reasonable to establish relationships between 036 * elements. It also assists in setting the {@link org.apache.hivemind.Location} of elements as they 037 * are created. 038 * <p> 039 * This support is structured around both XML but is suited towards configuration files rather than 040 * documents, in that the <em>content</em> (parsable character data) within an element is 041 * concatinated together and tracked as a single blob. 042 * 043 * @author Howard Lewis Ship 044 */ 045 public abstract class AbstractParser extends DefaultHandler 046 { 047 048 /** 049 * The parser is built around a stack of these Items. This used to figure out the current state, 050 * the element being processed, and the matching descriptor object. 051 */ 052 private static class Item 053 { 054 StringBuffer _buffer; 055 056 String _elementName; 057 058 boolean _ignoreCharacterData; 059 060 Object _object; 061 062 /** 063 * Prior state of the parser before this item was pushed. 064 */ 065 int _priorState; 066 067 Item(String elementName, Object object, int priorState, boolean ignoreCharacterData) 068 { 069 _elementName = elementName; 070 _object = object; 071 _priorState = priorState; 072 _ignoreCharacterData = ignoreCharacterData; 073 } 074 075 void addContent(char[] buffer, int start, int length) 076 { 077 if (_ignoreCharacterData) 078 return; 079 080 if (_buffer == null) 081 _buffer = new StringBuffer(length); 082 083 _buffer.append(buffer, start, length); 084 } 085 086 String getContent() 087 { 088 if (_buffer != null) 089 return _buffer.toString().trim(); 090 091 return null; 092 } 093 } 094 095 private int _currentColumn; 096 097 private int _currentLine; 098 099 private Location _location; 100 101 private Locator _locator; 102 103 private Resource _resource; 104 105 private List _stack; 106 107 private int _state; 108 109 private Item _top; 110 111 /** 112 * Accepts parseable character data from within an element and applies it to the top stack 113 * element. This may be invoked multiple times by the parser, and the overall data will 114 * accumulate. This content can be retrieved via {@link #peekContent()}. 115 */ 116 public void characters(char[] ch, int start, int length) throws SAXException 117 { 118 _top.addContent(ch, start, length); 119 } 120 121 /** 122 * Invokes {@link #fatalError(SAXParseException)}. 123 */ 124 public void error(SAXParseException ex) throws SAXException 125 { 126 fatalError(ex); 127 } 128 129 /** 130 * @param ex 131 * exception to be thrown 132 * @throws SAXParseException 133 */ 134 public void fatalError(SAXParseException ex) throws SAXException 135 { 136 throw ex; 137 } 138 139 /** 140 * Returns a "path" to the current element, as a series of element names seperated by slashes, 141 * i.e., "top/middle/leaf". 142 */ 143 protected String getElementPath() 144 { 145 StringBuffer buffer = new StringBuffer(); 146 147 int count = _stack.size(); 148 for (int i = 0; i < count; i++) 149 { 150 if (i > 0) 151 buffer.append('/'); 152 153 Item item = (Item) _stack.get(i); 154 155 buffer.append(item._elementName); 156 } 157 158 return buffer.toString(); 159 } 160 161 /** 162 * Returns the current lcoation, as reported by the parser. 163 */ 164 protected Location getLocation() 165 { 166 int line = _locator.getLineNumber(); 167 int column = _locator.getColumnNumber(); 168 169 if (line != _currentLine || column != _currentColumn) 170 _location = null; 171 172 if (_location == null) 173 _location = new LocationImpl(_resource, line, column); 174 175 _currentLine = line; 176 _currentColumn = column; 177 178 return _location; 179 } 180 181 /** 182 * Returns the {@link Resource} being parsed (as set by {@link #initializeParser(Resource, int)}. 183 */ 184 185 protected Resource getResource() 186 { 187 return _resource; 188 } 189 190 /** 191 * Returns the current state of the parser. State is initially set by 192 * {@link #initializeParser(Resource, int)} and is later updated by 193 * {@link #push(String, Object, int, boolean)} and {@link #pop()}. 194 */ 195 protected int getState() 196 { 197 return _state; 198 } 199 200 /** 201 * Initializes the parser; this should be called before any SAX parse events are received. 202 * 203 * @param resource 204 * the resource being parsed (used for some error messages) 205 * @param startState 206 * the initial state of the parser (the interpretation of state is determined by 207 * subclasses) 208 */ 209 protected void initializeParser(Resource resource, int startState) 210 { 211 _resource = resource; 212 _stack = new ArrayList(); 213 214 _location = null; 215 _state = startState; 216 } 217 218 /** 219 * Peeks at the top element on the stack, and returns its content (the accumuulated parseable 220 * character data directly enclosed by its start/end tags. 221 */ 222 protected String peekContent() 223 { 224 return _top.getContent(); 225 } 226 227 /** 228 * Peeks at the top element on the stack and returns its element name. 229 */ 230 protected String peekElementName() 231 { 232 return _top._elementName; 233 } 234 235 /** 236 * Peeks at the top element on the stack and returns the object for that element. 237 */ 238 239 protected Object peekObject() 240 { 241 return _top._object; 242 } 243 244 /** 245 * Invoked when the closing tag for an element is enountered {i.e, from 246 * {@link #endElement(String, String, String)}. This removes the corresponding item from the 247 * stack, and sets the parser state back to the (new) top element's state. 248 */ 249 protected void pop() 250 { 251 int count = _stack.size(); 252 253 _state = _top._priorState; 254 255 _stack.remove(count - 1); 256 257 if (count == 1) 258 _top = null; 259 else 260 _top = (Item) _stack.get(count - 2); 261 } 262 263 /** 264 * Enters a new state, pushing an object onto the stack. Invokes 265 * {@link #push(String, Object, int, boolean)}, and ignores character data within the element. 266 * 267 * @param elementName 268 * the element whose start tag was just parsed 269 * @param object 270 * the object created to represent the new object 271 * @param state 272 * the new state for the parse 273 */ 274 protected void push(String elementName, Object object, int state) 275 { 276 push(elementName, object, state, true); 277 } 278 279 /** 280 * Enters a new state, pusubhing an object onto the stack. If the object implements 281 * {@link org.apache.hivemind.LocationHolder} then its location property is set to the 282 * current location. 283 * 284 * @param elementName 285 * the element whose start tag was just parsed 286 * @param object 287 * the object created to represent the new object 288 * @param state 289 * the new state for the parse 290 * @param ignoreCharacterData 291 * if true, then any character data (typically whitespace) directly enclosed by the 292 * element is ignored 293 */ 294 protected void push(String elementName, Object object, int state, boolean ignoreCharacterData) 295 { 296 HiveMind.setLocation(object, getLocation()); 297 298 Item item = new Item(elementName, object, _state, ignoreCharacterData); 299 300 _stack.add(item); 301 302 _top = item; 303 _state = state; 304 } 305 306 /** 307 * Resets all state after a parse. 308 */ 309 protected void resetParser() 310 { 311 _resource = null; 312 _locator = null; 313 _stack = null; 314 _location = null; 315 } 316 317 /** 318 * Invoked by the parser, the locator is stored and later used by {@link #getLocation()}. 319 */ 320 public void setDocumentLocator(Locator locator) 321 { 322 _locator = locator; 323 } 324 325 /** 326 * Forces a change to a specific state. 327 */ 328 protected void setState(int state) 329 { 330 _state = state; 331 } 332 333 /** 334 * Invoked when an unexpected element is parsed (useful for parses that don't perform 335 * validation, or when there's no DTD). 336 * 337 * @throws ApplicationRuntimeException 338 * describing the situation 339 */ 340 protected void unexpectedElement(String elementName) 341 { 342 throw new ApplicationRuntimeException(ParseMessages.unexpectedElement( 343 elementName, 344 getElementPath()), getLocation(), null); 345 } 346 347 /** 348 * Ocassionaly it is necessary to "change our mind" about what's on the top of the stack. 349 * 350 * @param object 351 * the new object for the top stack element 352 */ 353 protected void updateObject(Object object) 354 { 355 _top._object = object; 356 } 357 358 /** 359 * Invokes {@link #fatalError(SAXParseException)}. 360 */ 361 public void warning(SAXParseException ex) throws SAXException 362 { 363 fatalError(ex); 364 } 365 366 private Map constructAttributesMap(Attributes attributes) 367 { 368 Map result = new HashMap(); 369 int count = attributes.getLength(); 370 371 for (int i = 0; i < count; i++) 372 { 373 String key = attributes.getLocalName(i); 374 375 if (HiveMind.isBlank(key)) 376 key = attributes.getQName(i); 377 378 String value = attributes.getValue(i); 379 380 result.put(key, value); 381 } 382 383 return result; 384 } 385 386 /** 387 * Invoked when an element's start tag is recognized. The element and attributes are provided to 388 * the subclass for further processing. 389 */ 390 protected abstract void begin(String elementName, Map attributes); 391 392 /** 393 * Invoked when an element's close tag is recognized. The element is provided. The content of 394 * the element (the unparsed whitespace within the element's tags) is available via 395 * {@link #peekContent()}. 396 */ 397 398 protected abstract void end(String elementName); 399 400 public void endElement(String uri, String localName, String qName) throws SAXException 401 { 402 end(getElementName(localName, qName)); 403 } 404 405 public void startElement(String uri, String localName, String qName, Attributes attributes) 406 throws SAXException 407 { 408 String elementName = getElementName(localName, qName); 409 410 begin(elementName, constructAttributesMap(attributes)); 411 } 412 413 private String getElementName(String localName, String qName) 414 { 415 return qName != null ? qName : localName; 416 } 417 }