/* |
|
* Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. |
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
* |
|
* This code is free software; you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License version 2 only, as |
|
* published by the Free Software Foundation. Oracle designates this |
|
* particular file as subject to the "Classpath" exception as provided |
|
* by Oracle in the LICENSE file that accompanied this code. |
|
* |
|
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
* version 2 for more details (a copy is included in the LICENSE file that |
|
* accompanied this code). |
|
* |
|
* You should have received a copy of the GNU General Public License version |
|
* 2 along with this work; if not, write to the Free Software Foundation, |
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
* |
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
* or visit www.oracle.com if you need additional information or have any |
|
* questions. |
|
*/ |
|
package jdk.internal.util.xml.impl; |
|
import java.io.IOException; |
|
import java.io.InputStream; |
|
import java.io.InputStreamReader; |
|
import java.io.Reader; |
|
import java.io.UnsupportedEncodingException; |
|
import java.util.HashMap; |
|
import java.util.Map; |
|
import jdk.internal.org.xml.sax.InputSource; |
|
import jdk.internal.org.xml.sax.SAXException; |
|
/** |
|
* XML non-validating parser engine. |
|
*/ |
|
public abstract class Parser { |
|
public final static String FAULT = ""; |
|
protected final static int BUFFSIZE_READER = 512; |
|
protected final static int BUFFSIZE_PARSER = 128; |
|
/** |
|
* The end of stream character. |
|
*/ |
|
public final static char EOS = 0xffff; |
|
private Pair mNoNS; // there is no namespace |
|
private Pair mXml; // the xml namespace |
|
private Map<String, Input> mEnt; // the entities look up table |
|
private Map<String, Input> mPEnt; // the parmeter entities look up table |
|
protected boolean mIsSAlone; // xml decl standalone flag |
|
protected boolean mIsSAloneSet; // standalone is explicitely set |
|
protected boolean mIsNSAware; // if true - namespace aware mode |
|
protected int mPh; // current phase of document processing |
|
protected final static int PH_BEFORE_DOC = -1; // before parsing |
|
protected final static int PH_DOC_START = 0; // document start |
|
protected final static int PH_MISC_DTD = 1; // misc before DTD |
|
protected final static int PH_DTD = 2; // DTD |
|
protected final static int PH_DTD_MISC = 3; // misc after DTD |
|
protected final static int PH_DOCELM = 4; // document's element |
|
protected final static int PH_DOCELM_MISC = 5; // misc after element |
|
protected final static int PH_AFTER_DOC = 6; // after parsing |
|
protected int mEvt; // current event type |
|
protected final static int EV_NULL = 0; // unknown |
|
protected final static int EV_ELM = 1; // empty element |
|
protected final static int EV_ELMS = 2; // start element |
|
protected final static int EV_ELME = 3; // end element |
|
protected final static int EV_TEXT = 4; // textual content |
|
protected final static int EV_WSPC = 5; // white space content |
|
protected final static int EV_PI = 6; // processing instruction |
|
protected final static int EV_CDAT = 7; // character data |
|
protected final static int EV_COMM = 8; // comment |
|
protected final static int EV_DTD = 9; // document type definition |
|
protected final static int EV_ENT = 10; // skipped entity |
|
private char mESt; // built-in entity recognizer state |
|
// mESt values: |
|
// 0x100 : the initial state |
|
// > 0x100 : unrecognized name |
|
// < 0x100 : replacement character |
|
protected char[] mBuff; // parser buffer |
|
protected int mBuffIdx; // index of the last char |
|
protected Pair mPref; // stack of prefixes |
|
protected Pair mElm; // stack of elements |
|
// mAttL.chars - element qname |
|
// mAttL.next - next element |
|
// mAttL.list - list of attributes defined on this element |
|
// mAttL.list.chars - attribute qname |
|
// mAttL.list.id - a char representing attribute's type see below |
|
// mAttL.list.next - next attribute defined on the element |
|
// mAttL.list.list - devault value structure or null |
|
// mAttL.list.list.chars - "name='value' " chars array for Input |
|
// |
|
// Attribute type character values: |
|
// 'i' - "ID" |
|
// 'r' - "IDREF" |
|
// 'R' - "IDREFS" |
|
// 'n' - "ENTITY" |
|
// 'N' - "ENTITIES" |
|
// 't' - "NMTOKEN" |
|
// 'T' - "NMTOKENS" |
|
// 'u' - enumeration type |
|
// 'o' - "NOTATION" |
|
// 'c' - "CDATA" |
|
// see also: bkeyword() and atype() |
|
// |
|
protected Pair mAttL; // list of defined attrs by element name |
|
protected Input mDoc; // document entity |
|
protected Input mInp; // stack of entities |
|
private char[] mChars; // reading buffer |
|
private int mChLen; // current capacity |
|
private int mChIdx; // index to the next char |
|
protected Attrs mAttrs; // attributes of the curr. element |
|
private String[] mItems; // attributes array of the curr. element |
|
private char mAttrIdx; // attributes counter/index |
|
private String mUnent; // unresolved entity name |
|
private Pair mDltd; // deleted objects for reuse |
|
/** |
|
* Default prefixes |
|
*/ |
|
private final static char NONS[]; |
|
private final static char XML[]; |
|
private final static char XMLNS[]; |
|
static { |
|
NONS = new char[1]; |
|
NONS[0] = (char) 0; |
|
XML = new char[4]; |
|
XML[0] = (char) 4; |
|
XML[1] = 'x'; |
|
XML[2] = 'm'; |
|
XML[3] = 'l'; |
|
XMLNS = new char[6]; |
|
XMLNS[0] = (char) 6; |
|
XMLNS[1] = 'x'; |
|
XMLNS[2] = 'm'; |
|
XMLNS[3] = 'l'; |
|
XMLNS[4] = 'n'; |
|
XMLNS[5] = 's'; |
|
} |
|
/** |
|
* ASCII character type array. |
|
* |
|
* This array maps an ASCII (7 bit) character to the character type.<br /> |
|
* Possible character type values are:<br /> - ' ' for any kind of white |
|
* space character;<br /> - 'a' for any lower case alphabetical character |
|
* value;<br /> - 'A' for any upper case alphabetical character value;<br /> |
|
* - 'd' for any decimal digit character value;<br /> - 'z' for any |
|
* character less then ' ' except '\t', '\n', '\r';<br /> An ASCII (7 bit) |
|
* character which does not fall in any category listed above is mapped to |
|
* it self. |
|
*/ |
|
private static final byte asctyp[]; |
|
/** |
|
* NMTOKEN character type array. |
|
* |
|
* This array maps an ASCII (7 bit) character to the character type.<br /> |
|
* Possible character type values are:<br /> - 0 for underscore ('_') or any |
|
* lower and upper case alphabetical character value;<br /> - 1 for colon |
|
* (':') character;<br /> - 2 for dash ('-') and dot ('.') or any decimal |
|
* digit character value;<br /> - 3 for any kind of white space character<br |
|
* /> An ASCII (7 bit) character which does not fall in any category listed |
|
* above is mapped to 0xff. |
|
*/ |
|
private static final byte nmttyp[]; |
|
/** |
|
* Static constructor. |
|
* |
|
* Sets up the ASCII character type array which is used by |
|
* {@link #asctyp asctyp} method and NMTOKEN character type array. |
|
*/ |
|
static { |
|
short i = 0; |
|
asctyp = new byte[0x80]; |
|
while (i < ' ') { |
|
asctyp[i++] = (byte) 'z'; |
|
} |
|
asctyp['\t'] = (byte) ' '; |
|
asctyp['\r'] = (byte) ' '; |
|
asctyp['\n'] = (byte) ' '; |
|
while (i < '0') { |
|
asctyp[i] = (byte) i++; |
|
} |
|
while (i <= '9') { |
|
asctyp[i++] = (byte) 'd'; |
|
} |
|
while (i < 'A') { |
|
asctyp[i] = (byte) i++; |
|
} |
|
while (i <= 'Z') { |
|
asctyp[i++] = (byte) 'A'; |
|
} |
|
while (i < 'a') { |
|
asctyp[i] = (byte) i++; |
|
} |
|
while (i <= 'z') { |
|
asctyp[i++] = (byte) 'a'; |
|
} |
|
while (i < 0x80) { |
|
asctyp[i] = (byte) i++; |
|
} |
|
nmttyp = new byte[0x80]; |
|
for (i = 0; i < '0'; i++) { |
|
nmttyp[i] = (byte) 0xff; |
|
} |
|
while (i <= '9') { |
|
nmttyp[i++] = (byte) 2; // digits |
|
} |
|
while (i < 'A') { |
|
nmttyp[i++] = (byte) 0xff; |
|
} |
|
// skiped upper case alphabetical character are already 0 |
|
for (i = '['; i < 'a'; i++) { |
|
nmttyp[i] = (byte) 0xff; |
|
} |
|
// skiped lower case alphabetical character are already 0 |
|
for (i = '{'; i < 0x80; i++) { |
|
nmttyp[i] = (byte) 0xff; |
|
} |
|
nmttyp['_'] = 0; |
|
nmttyp[':'] = 1; |
|
nmttyp['.'] = 2; |
|
nmttyp['-'] = 2; |
|
nmttyp[' '] = 3; |
|
nmttyp['\t'] = 3; |
|
nmttyp['\r'] = 3; |
|
nmttyp['\n'] = 3; |
|
} |
|
/** |
|
* Constructor. |
|
*/ |
|
protected Parser() { |
|
mPh = PH_BEFORE_DOC; // before parsing |
|
// Initialize the parser |
|
mBuff = new char[BUFFSIZE_PARSER]; |
|
mAttrs = new Attrs(); |
|
// Default namespace |
|
mPref = pair(mPref); |
|
mPref.name = ""; |
|
mPref.value = ""; |
|
mPref.chars = NONS; |
|
mNoNS = mPref; // no namespace |
|
// XML namespace |
|
mPref = pair(mPref); |
|
mPref.name = "xml"; |
|
mPref.value = "http://www.w3.org/XML/1998/namespace"; |
|
mPref.chars = XML; |
|
mXml = mPref; // XML namespace |
|
} |
|
/** |
|
* Initializes parser's internals. Note, current input has to be set before |
|
* this method is called. |
|
*/ |
|
protected void init() { |
|
mUnent = null; |
|
mElm = null; |
|
mPref = mXml; |
|
mAttL = null; |
|
mPEnt = new HashMap<>(); |
|
mEnt = new HashMap<>(); |
|
mDoc = mInp; // current input is document entity |
|
mChars = mInp.chars; // use document entity buffer |
|
mPh = PH_DOC_START; // the begining of the document |
|
} |
|
/** |
|
* Cleans up parser internal resources. |
|
*/ |
|
protected void cleanup() { |
|
// Default attributes |
|
while (mAttL != null) { |
|
while (mAttL.list != null) { |
|
if (mAttL.list.list != null) { |
|
del(mAttL.list.list); |
|
} |
|
mAttL.list = del(mAttL.list); |
|
} |
|
mAttL = del(mAttL); |
|
} |
|
// Element stack |
|
while (mElm != null) { |
|
mElm = del(mElm); |
|
} |
|
// Namespace prefixes |
|
while (mPref != mXml) { |
|
mPref = del(mPref); |
|
} |
|
// Inputs |
|
while (mInp != null) { |
|
pop(); |
|
} |
|
// Document reader |
|
if ((mDoc != null) && (mDoc.src != null)) { |
|
try { |
|
mDoc.src.close(); |
|
} catch (IOException ioe) { |
|
} |
|
} |
|
mPEnt = null; |
|
mEnt = null; |
|
mDoc = null; |
|
mPh = PH_AFTER_DOC; // before documnet processing |
|
} |
|
/** |
|
* Processes a portion of document. This method returns one of EV_* |
|
* constants as an identifier of the portion of document have been read. |
|
* |
|
* @return Identifier of processed document portion. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
protected int step() throws Exception { |
|
mEvt = EV_NULL; |
|
int st = 0; |
|
while (mEvt == EV_NULL) { |
|
char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); |
|
switch (st) { |
|
case 0: // all sorts of markup (dispetcher) |
|
if (ch != '<') { |
|
bkch(); |
|
mBuffIdx = -1; // clean parser buffer |
|
st = 1; |
|
break; |
|
} |
|
switch (getch()) { |
|
case '/': // the end of the element content |
|
mEvt = EV_ELME; |
|
if (mElm == null) { |
|
panic(FAULT); |
|
} |
|
// Check element's open/close tags balance |
|
mBuffIdx = -1; // clean parser buffer |
|
bname(mIsNSAware); |
|
char[] chars = mElm.chars; |
|
if (chars.length == (mBuffIdx + 1)) { |
|
for (char i = 1; i <= mBuffIdx; i += 1) { |
|
if (chars[i] != mBuff[i]) { |
|
panic(FAULT); |
|
} |
|
} |
|
} else { |
|
panic(FAULT); |
|
} |
|
// Skip white spaces before '>' |
|
if (wsskip() != '>') { |
|
panic(FAULT); |
|
} |
|
getch(); // read '>' |
|
break; |
|
case '!': // a comment or a CDATA |
|
ch = getch(); |
|
bkch(); |
|
switch (ch) { |
|
case '-': // must be a comment |
|
mEvt = EV_COMM; |
|
comm(); |
|
break; |
|
case '[': // must be a CDATA section |
|
mEvt = EV_CDAT; |
|
cdat(); |
|
break; |
|
default: // must be 'DOCTYPE' |
|
mEvt = EV_DTD; |
|
dtd(); |
|
break; |
|
} |
|
break; |
|
case '?': // processing instruction |
|
mEvt = EV_PI; |
|
pi(); |
|
break; |
|
default: // must be the first char of an xml name |
|
bkch(); |
|
// Read an element name and put it on top of the |
|
// element stack |
|
mElm = pair(mElm); // add new element to the stack |
|
mElm.chars = qname(mIsNSAware); |
|
mElm.name = mElm.local(); |
|
mElm.id = (mElm.next != null) ? mElm.next.id : 0; // flags |
|
mElm.num = 0; // namespace counter |
|
// Find the list of defined attributs of the current |
|
// element |
|
Pair elm = find(mAttL, mElm.chars); |
|
mElm.list = (elm != null) ? elm.list : null; |
|
// Read attributes till the end of the element tag |
|
mAttrIdx = 0; |
|
Pair att = pair(null); |
|
att.num = 0; // clear attribute's flags |
|
attr(att); // get all attributes inc. defaults |
|
del(att); |
|
mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null; |
|
// Skip white spaces before '>' |
|
switch (wsskip()) { |
|
case '>': |
|
getch(); // read '>' |
|
mEvt = EV_ELMS; |
|
break; |
|
case '/': |
|
getch(); // read '/' |
|
if (getch() != '>') // read '>' |
|
{ |
|
panic(FAULT); |
|
} |
|
mEvt = EV_ELM; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
} |
|
break; |
|
case 1: // read white space |
|
switch (ch) { |
|
case ' ': |
|
case '\t': |
|
case '\n': |
|
bappend(ch); |
|
break; |
|
case '\r': // EOL processing [#2.11] |
|
if (getch() != '\n') { |
|
bkch(); |
|
} |
|
bappend('\n'); |
|
break; |
|
case '<': |
|
mEvt = EV_WSPC; |
|
bkch(); |
|
bflash_ws(); |
|
break; |
|
default: |
|
bkch(); |
|
st = 2; |
|
break; |
|
} |
|
break; |
|
case 2: // read the text content of the element |
|
switch (ch) { |
|
case '&': |
|
if (mUnent == null) { |
|
// There was no unresolved entity on previous step. |
|
if ((mUnent = ent('x')) != null) { |
|
mEvt = EV_TEXT; |
|
bkch(); // move back to ';' after entity name |
|
setch('&'); // parser must be back on next step |
|
bflash(); |
|
} |
|
} else { |
|
// There was unresolved entity on previous step. |
|
mEvt = EV_ENT; |
|
skippedEnt(mUnent); |
|
mUnent = null; |
|
} |
|
break; |
|
case '<': |
|
mEvt = EV_TEXT; |
|
bkch(); |
|
bflash(); |
|
break; |
|
case '\r': // EOL processing [#2.11] |
|
if (getch() != '\n') { |
|
bkch(); |
|
} |
|
bappend('\n'); |
|
break; |
|
case EOS: |
|
panic(FAULT); |
|
default: |
|
bappend(ch); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
return mEvt; |
|
} |
|
/** |
|
* Parses the document type declaration. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void dtd() throws Exception { |
|
char ch; |
|
String str = null; |
|
String name = null; |
|
Pair psid = null; |
|
// read 'DOCTYPE' |
|
if ("DOCTYPE".equals(name(false)) != true) { |
|
panic(FAULT); |
|
} |
|
mPh = PH_DTD; // DTD |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // read the document type name |
|
if (chtyp(ch) != ' ') { |
|
bkch(); |
|
name = name(mIsNSAware); |
|
wsskip(); |
|
st = 1; // read 'PUPLIC' or 'SYSTEM' |
|
} |
|
break; |
|
case 1: // read 'PUPLIC' or 'SYSTEM' |
|
switch (chtyp(ch)) { |
|
case 'A': |
|
bkch(); |
|
psid = pubsys(' '); |
|
st = 2; // skip spaces before internal subset |
|
docType(name, psid.name, psid.value); |
|
break; |
|
case '[': |
|
bkch(); |
|
st = 2; // skip spaces before internal subset |
|
docType(name, null, null); |
|
break; |
|
case '>': |
|
bkch(); |
|
st = 3; // skip spaces after internal subset |
|
docType(name, null, null); |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 2: // skip spaces before internal subset |
|
switch (chtyp(ch)) { |
|
case '[': |
|
// Process internal subset |
|
dtdsub(); |
|
st = 3; // skip spaces after internal subset |
|
break; |
|
case '>': |
|
// There is no internal subset |
|
bkch(); |
|
st = 3; // skip spaces after internal subset |
|
break; |
|
case ' ': |
|
// skip white spaces |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 3: // skip spaces after internal subset |
|
switch (chtyp(ch)) { |
|
case '>': |
|
if (psid != null) { |
|
// Report the DTD external subset |
|
InputSource is = resolveEnt(name, psid.name, psid.value); |
|
if (is != null) { |
|
if (mIsSAlone == false) { |
|
// Set the end of DTD external subset char |
|
bkch(); |
|
setch(']'); |
|
// Set the DTD external subset InputSource |
|
push(new Input(BUFFSIZE_READER)); |
|
setinp(is); |
|
mInp.pubid = psid.name; |
|
mInp.sysid = psid.value; |
|
// Parse the DTD external subset |
|
dtdsub(); |
|
} else { |
|
// Unresolved DTD external subset |
|
skippedEnt("[dtd]"); |
|
// Release reader and stream |
|
if (is.getCharacterStream() != null) { |
|
try { |
|
is.getCharacterStream().close(); |
|
} catch (IOException ioe) { |
|
} |
|
} |
|
if (is.getByteStream() != null) { |
|
try { |
|
is.getByteStream().close(); |
|
} catch (IOException ioe) { |
|
} |
|
} |
|
} |
|
} else { |
|
// Unresolved DTD external subset |
|
skippedEnt("[dtd]"); |
|
} |
|
del(psid); |
|
} |
|
st = -1; // end of DTD |
|
break; |
|
case ' ': |
|
// skip white spaces |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Parses the document type declaration subset. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void dtdsub() throws Exception { |
|
char ch; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // skip white spaces before a declaration |
|
switch (chtyp(ch)) { |
|
case '<': |
|
ch = getch(); |
|
switch (ch) { |
|
case '?': |
|
pi(); |
|
break; |
|
case '!': |
|
ch = getch(); |
|
bkch(); |
|
if (ch == '-') { |
|
comm(); |
|
break; |
|
} |
|
// A markup or an entity declaration |
|
bntok(); |
|
switch (bkeyword()) { |
|
case 'n': |
|
dtdent(); |
|
break; |
|
case 'a': |
|
dtdattl(); // parse attributes declaration |
|
break; |
|
case 'e': |
|
dtdelm(); // parse element declaration |
|
break; |
|
case 'o': |
|
dtdnot(); // parse notation declaration |
|
break; |
|
default: |
|
panic(FAULT); // unsupported markup declaration |
|
break; |
|
} |
|
st = 1; // read the end of declaration |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case '%': |
|
// A parameter entity reference |
|
pent(' '); |
|
break; |
|
case ']': |
|
// End of DTD subset |
|
st = -1; |
|
break; |
|
case ' ': |
|
// Skip white spaces |
|
break; |
|
case 'Z': |
|
// End of stream |
|
if (getch() != ']') { |
|
panic(FAULT); |
|
} |
|
st = -1; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 1: // read the end of declaration |
|
switch (ch) { |
|
case '>': // there is no notation |
|
st = 0; // skip white spaces before a declaration |
|
break; |
|
case ' ': |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
// Skip white spaces |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Parses an entity declaration. This method fills the general ( |
|
* <code>mEnt</code>) and parameter |
|
* ( |
|
* <code>mPEnt</code>) entity look up table. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void dtdent() throws Exception { |
|
String str = null; |
|
char[] val = null; |
|
Input inp = null; |
|
Pair ids = null; |
|
char ch; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // skip white spaces before entity name |
|
switch (chtyp(ch)) { |
|
case ' ': |
|
// Skip white spaces |
|
break; |
|
case '%': |
|
// Parameter entity or parameter entity declaration. |
|
ch = getch(); |
|
bkch(); |
|
if (chtyp(ch) == ' ') { |
|
// Parameter entity declaration. |
|
wsskip(); |
|
str = name(false); |
|
switch (chtyp(wsskip())) { |
|
case 'A': |
|
// Read the external identifier |
|
ids = pubsys(' '); |
|
if (wsskip() == '>') { |
|
// External parsed entity |
|
if (mPEnt.containsKey(str) == false) { // [#4.2] |
|
inp = new Input(); |
|
inp.pubid = ids.name; |
|
inp.sysid = ids.value; |
|
mPEnt.put(str, inp); |
|
} |
|
} else { |
|
panic(FAULT); |
|
} |
|
del(ids); |
|
st = -1; // the end of declaration |
|
break; |
|
case '\"': |
|
case '\'': |
|
// Read the parameter entity value |
|
bqstr('d'); |
|
// Create the parameter entity value |
|
val = new char[mBuffIdx + 1]; |
|
System.arraycopy(mBuff, 1, val, 1, val.length - 1); |
|
// Add surrounding spaces [#4.4.8] |
|
val[0] = ' '; |
|
// Add the entity to the entity look up table |
|
if (mPEnt.containsKey(str) == false) { // [#4.2] |
|
inp = new Input(val); |
|
inp.pubid = mInp.pubid; |
|
inp.sysid = mInp.sysid; |
|
inp.xmlenc = mInp.xmlenc; |
|
inp.xmlver = mInp.xmlver; |
|
mPEnt.put(str, inp); |
|
} |
|
st = -1; // the end of declaration |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
} else { |
|
// Parameter entity reference. |
|
pent(' '); |
|
} |
|
break; |
|
default: |
|
bkch(); |
|
str = name(false); |
|
st = 1; // read entity declaration value |
|
break; |
|
} |
|
break; |
|
case 1: // read entity declaration value |
|
switch (chtyp(ch)) { |
|
case '\"': // internal entity |
|
case '\'': |
|
bkch(); |
|
bqstr('d'); // read a string into the buffer |
|
if (mEnt.get(str) == null) { |
|
// Create general entity value |
|
val = new char[mBuffIdx]; |
|
System.arraycopy(mBuff, 1, val, 0, val.length); |
|
// Add the entity to the entity look up table |
|
if (mEnt.containsKey(str) == false) { // [#4.2] |
|
inp = new Input(val); |
|
inp.pubid = mInp.pubid; |
|
inp.sysid = mInp.sysid; |
|
inp.xmlenc = mInp.xmlenc; |
|
inp.xmlver = mInp.xmlver; |
|
mEnt.put(str, inp); |
|
} |
|
} |
|
st = -1; // the end of declaration |
|
break; |
|
case 'A': // external entity |
|
bkch(); |
|
ids = pubsys(' '); |
|
switch (wsskip()) { |
|
case '>': // external parsed entity |
|
if (mEnt.containsKey(str) == false) { // [#4.2] |
|
inp = new Input(); |
|
inp.pubid = ids.name; |
|
inp.sysid = ids.value; |
|
mEnt.put(str, inp); |
|
} |
|
break; |
|
case 'N': // external general unparsed entity |
|
if ("NDATA".equals(name(false)) == true) { |
|
wsskip(); |
|
unparsedEntDecl(str, ids.name, ids.value, name(false)); |
|
break; |
|
} |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
del(ids); |
|
st = -1; // the end of declaration |
|
break; |
|
case ' ': |
|
// Skip white spaces |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Parses an element declaration. |
|
* |
|
* This method parses the declaration up to the closing angle bracket. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void dtdelm() throws Exception { |
|
// This is stub implementation which skips an element |
|
// declaration. |
|
wsskip(); |
|
name(mIsNSAware); |
|
char ch; |
|
while (true) { |
|
ch = getch(); |
|
switch (ch) { |
|
case '>': |
|
bkch(); |
|
return; |
|
case EOS: |
|
panic(FAULT); |
|
default: |
|
break; |
|
} |
|
} |
|
} |
|
/** |
|
* Parses an attribute list declaration. |
|
* |
|
* This method parses the declaration up to the closing angle bracket. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void dtdattl() throws Exception { |
|
char elmqn[] = null; |
|
Pair elm = null; |
|
char ch; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // read the element name |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
case 'X': |
|
case ':': |
|
bkch(); |
|
// Get the element from the list or add a new one. |
|
elmqn = qname(mIsNSAware); |
|
elm = find(mAttL, elmqn); |
|
if (elm == null) { |
|
elm = pair(mAttL); |
|
elm.chars = elmqn; |
|
mAttL = elm; |
|
} |
|
st = 1; // read an attribute declaration |
|
break; |
|
case ' ': |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case 1: // read an attribute declaration |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
case 'X': |
|
case ':': |
|
bkch(); |
|
dtdatt(elm); |
|
if (wsskip() == '>') { |
|
return; |
|
} |
|
break; |
|
case ' ': |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
} |
|
} |
|
/** |
|
* Parses an attribute declaration. |
|
* |
|
* The attribute uses the following fields of Pair object: chars - characters |
|
* of qualified name id - the type identifier of the attribute list - a pair |
|
* which holds the default value (chars field) |
|
* |
|
* @param elm An object which represents all defined attributes on an |
|
* element. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void dtdatt(Pair elm) throws Exception { |
|
char attqn[] = null; |
|
Pair att = null; |
|
char ch; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // the attribute name |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
case 'X': |
|
case ':': |
|
bkch(); |
|
// Get the attribute from the list or add a new one. |
|
attqn = qname(mIsNSAware); |
|
att = find(elm.list, attqn); |
|
if (att == null) { |
|
// New attribute declaration |
|
att = pair(elm.list); |
|
att.chars = attqn; |
|
elm.list = att; |
|
} else { |
|
// Do not override the attribute declaration [#3.3] |
|
att = pair(null); |
|
att.chars = attqn; |
|
att.id = 'c'; |
|
} |
|
wsskip(); |
|
st = 1; |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
case ' ': |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case 1: // the attribute type |
|
switch (chtyp(ch)) { |
|
case '(': |
|
att.id = 'u'; // enumeration type |
|
st = 2; // read the first element of the list |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
case ' ': |
|
break; |
|
default: |
|
bkch(); |
|
bntok(); // read type id |
|
att.id = bkeyword(); |
|
switch (att.id) { |
|
case 'o': // NOTATION |
|
if (wsskip() != '(') { |
|
panic(FAULT); |
|
} |
|
ch = getch(); |
|
st = 2; // read the first element of the list |
|
break; |
|
case 'i': // ID |
|
case 'r': // IDREF |
|
case 'R': // IDREFS |
|
case 'n': // ENTITY |
|
case 'N': // ENTITIES |
|
case 't': // NMTOKEN |
|
case 'T': // NMTOKENS |
|
case 'c': // CDATA |
|
wsskip(); |
|
st = 4; // read default declaration |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
} |
|
break; |
|
case 2: // read the first element of the list |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case 'd': |
|
case '.': |
|
case ':': |
|
case '-': |
|
case '_': |
|
case 'X': |
|
bkch(); |
|
switch (att.id) { |
|
case 'u': // enumeration type |
|
bntok(); |
|
break; |
|
case 'o': // NOTATION |
|
mBuffIdx = -1; |
|
bname(false); |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
wsskip(); |
|
st = 3; // read next element of the list |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
case ' ': |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case 3: // read next element of the list |
|
switch (ch) { |
|
case ')': |
|
wsskip(); |
|
st = 4; // read default declaration |
|
break; |
|
case '|': |
|
wsskip(); |
|
switch (att.id) { |
|
case 'u': // enumeration type |
|
bntok(); |
|
break; |
|
case 'o': // NOTATION |
|
mBuffIdx = -1; |
|
bname(false); |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
wsskip(); |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case 4: // read default declaration |
|
switch (ch) { |
|
case '#': |
|
bntok(); |
|
switch (bkeyword()) { |
|
case 'F': // FIXED |
|
switch (wsskip()) { |
|
case '\"': |
|
case '\'': |
|
st = 5; // read the default value |
|
break; |
|
case EOS: |
|
panic(FAULT); |
|
default: |
|
st = -1; |
|
break; |
|
} |
|
break; |
|
case 'Q': // REQUIRED |
|
case 'I': // IMPLIED |
|
st = -1; |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case '\"': |
|
case '\'': |
|
bkch(); |
|
st = 5; // read the default value |
|
break; |
|
case ' ': |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
break; |
|
case '%': |
|
pent(' '); |
|
break; |
|
default: |
|
bkch(); |
|
st = -1; |
|
break; |
|
} |
|
break; |
|
case 5: // read the default value |
|
switch (ch) { |
|
case '\"': |
|
case '\'': |
|
bkch(); |
|
bqstr('d'); // the value in the mBuff now |
|
att.list = pair(null); |
|
// Create a string like "attqname='value' " |
|
att.list.chars = new char[att.chars.length + mBuffIdx + 3]; |
|
System.arraycopy( |
|
att.chars, 1, att.list.chars, 0, att.chars.length - 1); |
|
att.list.chars[att.chars.length - 1] = '='; |
|
att.list.chars[att.chars.length] = ch; |
|
System.arraycopy( |
|
mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx); |
|
att.list.chars[att.chars.length + mBuffIdx + 1] = ch; |
|
att.list.chars[att.chars.length + mBuffIdx + 2] = ' '; |
|
st = -1; |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
} |
|
} |
|
/** |
|
* Parses a notation declaration. |
|
* |
|
* This method parses the declaration up to the closing angle bracket. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void dtdnot() throws Exception { |
|
wsskip(); |
|
String name = name(false); |
|
wsskip(); |
|
Pair ids = pubsys('N'); |
|
notDecl(name, ids.name, ids.value); |
|
del(ids); |
|
} |
|
/** |
|
* Parses an attribute. |
|
* |
|
* This recursive method is responsible for prefix addition |
|
* ( |
|
* <code>mPref</code>) on the way down. The element's start tag end triggers |
|
* the return process. The method then on it's way back resolves prefixes |
|
* and accumulates attributes. |
|
* |
|
* <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is |
|
* declared in DTD (attribute decalration had been read); 0x2 - attribute's |
|
* default value is used.</p> |
|
* |
|
* @param att An object which reprecents current attribute. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void attr(Pair att) throws Exception { |
|
switch (wsskip()) { |
|
case '/': |
|
case '>': |
|
if ((att.num & 0x2) == 0) { // all attributes have been read |
|
att.num |= 0x2; // set default attribute flag |
|
Input inp = mInp; |
|
// Go through all attributes defined on current element. |
|
for (Pair def = mElm.list; def != null; def = def.next) { |
|
if (def.list == null) // no default value |
|
{ |
|
continue; |
|
} |
|
// Go through all attributes defined on current |
|
// element and add defaults. |
|
Pair act = find(att.next, def.chars); |
|
if (act == null) { |
|
push(new Input(def.list.chars)); |
|
} |
|
} |
|
if (mInp != inp) { // defaults have been added |
|
attr(att); |
|
return; |
|
} |
|
} |
|
// Ensure the attribute string array capacity |
|
mAttrs.setLength(mAttrIdx); |
|
mItems = mAttrs.mItems; |
|
return; |
|
case EOS: |
|
panic(FAULT); |
|
default: |
|
// Read the attribute name and value |
|
att.chars = qname(mIsNSAware); |
|
att.name = att.local(); |
|
String type = atype(att); // sets attribute's type on att.id |
|
wsskip(); |
|
if (getch() != '=') { |
|
panic(FAULT); |
|
} |
|
bqstr((char) att.id); // read the value with normalization. |
|
String val = new String(mBuff, 1, mBuffIdx); |
|
Pair next = pair(att); |
|
next.num = (att.num & ~0x1); // inherit attribute flags |
|
// Put a namespace declaration on top of the prefix stack |
|
if ((mIsNSAware == false) || (isdecl(att, val) == false)) { |
|
// An ordinary attribute |
|
mAttrIdx++; |
|
attr(next); // recursive call to parse the next attribute |
|
mAttrIdx--; |
|
// Add the attribute to the attributes string array |
|
char idx = (char) (mAttrIdx << 3); |
|
mItems[idx + 1] = att.qname(); // attr qname |
|
mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name |
|
mItems[idx + 3] = val; // attr value |
|
mItems[idx + 4] = type; // attr type |
|
switch (att.num & 0x3) { |
|
case 0x0: |
|
mItems[idx + 5] = null; |
|
break; |
|
case 0x1: // declared attribute |
|
mItems[idx + 5] = "d"; |
|
break; |
|
default: // 0x2, 0x3 - default attribute always declared |
|
mItems[idx + 5] = "D"; |
|
break; |
|
} |
|
// Resolve the prefix if any and report the attribute |
|
// NOTE: The attribute does not accept the default namespace. |
|
mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : ""; |
|
} else { |
|
// A namespace declaration. mPref.name contains prefix and |
|
// mPref.value contains namespace URI set by isdecl method. |
|
// Report a start of the new mapping |
|
newPrefix(); |
|
// Recursive call to parse the next attribute |
|
attr(next); |
|
// NOTE: The namespace declaration is not reported. |
|
} |
|
del(next); |
|
break; |
|
} |
|
} |
|
/** |
|
* Retrieves attribute type. |
|
* |
|
* This method sets the type of normalization in the attribute |
|
* <code>id</code> field and returns the name of attribute type. |
|
* |
|
* @param att An object which represents current attribute. |
|
* @return The name of the attribute type. |
|
* @exception Exception is parser specific exception form panic method. |
|
*/ |
|
private String atype(Pair att) |
|
throws Exception { |
|
Pair attr; |
|
// CDATA-type normalization by default [#3.3.3] |
|
att.id = 'c'; |
|
if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) { |
|
return "CDATA"; |
|
} |
|
att.num |= 0x1; // attribute is declared |
|
// Non-CDATA normalization except when the attribute type is CDATA. |
|
att.id = 'i'; |
|
switch (attr.id) { |
|
case 'i': |
|
return "ID"; |
|
case 'r': |
|
return "IDREF"; |
|
case 'R': |
|
return "IDREFS"; |
|
case 'n': |
|
return "ENTITY"; |
|
case 'N': |
|
return "ENTITIES"; |
|
case 't': |
|
return "NMTOKEN"; |
|
case 'T': |
|
return "NMTOKENS"; |
|
case 'u': |
|
return "NMTOKEN"; |
|
case 'o': |
|
return "NOTATION"; |
|
case 'c': |
|
att.id = 'c'; |
|
return "CDATA"; |
|
default: |
|
panic(FAULT); |
|
} |
|
return null; |
|
} |
|
/** |
|
* Parses a comment. |
|
* |
|
* The '<!' part is read in dispatcher so the method starts |
|
* with first '-' after '<!'. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void comm() throws Exception { |
|
if (mPh == PH_DOC_START) { |
|
mPh = PH_MISC_DTD; // misc before DTD |
|
} // '<!' has been already read by dispetcher. |
|
char ch; |
|
mBuffIdx = -1; |
|
for (short st = 0; st >= 0;) { |
|
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); |
|
if (ch == EOS) { |
|
panic(FAULT); |
|
} |
|
switch (st) { |
|
case 0: // first '-' of the comment open |
|
if (ch == '-') { |
|
st = 1; |
|
} else { |
|
panic(FAULT); |
|
} |
|
break; |
|
case 1: // secind '-' of the comment open |
|
if (ch == '-') { |
|
st = 2; |
|
} else { |
|
panic(FAULT); |
|
} |
|
break; |
|
case 2: // skip the comment body |
|
switch (ch) { |
|
case '-': |
|
st = 3; |
|
break; |
|
default: |
|
bappend(ch); |
|
break; |
|
} |
|
break; |
|
case 3: // second '-' of the comment close |
|
switch (ch) { |
|
case '-': |
|
st = 4; |
|
break; |
|
default: |
|
bappend('-'); |
|
bappend(ch); |
|
st = 2; |
|
break; |
|
} |
|
break; |
|
case 4: // '>' of the comment close |
|
if (ch == '>') { |
|
comm(mBuff, mBuffIdx + 1); |
|
st = -1; |
|
break; |
|
} |
|
// else - panic [#2.5 compatibility note] |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Parses a processing instruction. |
|
* |
|
* The '<?' is read in dispatcher so the method starts with |
|
* first character of PI target name after '<?'. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void pi() throws Exception { |
|
// '<?' has been already read by dispetcher. |
|
char ch; |
|
String str = null; |
|
mBuffIdx = -1; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
if (ch == EOS) { |
|
panic(FAULT); |
|
} |
|
switch (st) { |
|
case 0: // read the PI target name |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
case ':': |
|
case 'X': |
|
bkch(); |
|
str = name(false); |
|
// PI target name may not be empty string [#2.6] |
|
// PI target name 'XML' is reserved [#2.6] |
|
if ((str.length() == 0) |
|
|| (mXml.name.equals(str.toLowerCase()) == true)) { |
|
panic(FAULT); |
|
} |
|
// This is processing instruction |
|
if (mPh == PH_DOC_START) // the begining of the document |
|
{ |
|
mPh = PH_MISC_DTD; // misc before DTD |
|
} |
|
wsskip(); // skip spaces after the PI target name |
|
st = 1; // accumulate the PI body |
|
mBuffIdx = -1; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 1: // accumulate the PI body |
|
switch (ch) { |
|
case '?': |
|
st = 2; // end of the PI body |
|
break; |
|
default: |
|
bappend(ch); |
|
break; |
|
} |
|
break; |
|
case 2: // end of the PI body |
|
switch (ch) { |
|
case '>': |
|
// PI has been read. |
|
pi(str, new String(mBuff, 0, mBuffIdx + 1)); |
|
st = -1; |
|
break; |
|
case '?': |
|
bappend('?'); |
|
break; |
|
default: |
|
bappend('?'); |
|
bappend(ch); |
|
st = 1; // accumulate the PI body |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Parses a character data. |
|
* |
|
* The '<!' part is read in dispatcher so the method starts |
|
* with first '[' after '<!'. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void cdat() |
|
throws Exception { |
|
// '<!' has been already read by dispetcher. |
|
char ch; |
|
mBuffIdx = -1; |
|
for (short st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // the first '[' of the CDATA open |
|
if (ch == '[') { |
|
st = 1; |
|
} else { |
|
panic(FAULT); |
|
} |
|
break; |
|
case 1: // read "CDATA" |
|
if (chtyp(ch) == 'A') { |
|
bappend(ch); |
|
} else { |
|
if ("CDATA".equals( |
|
new String(mBuff, 0, mBuffIdx + 1)) != true) { |
|
panic(FAULT); |
|
} |
|
bkch(); |
|
st = 2; |
|
} |
|
break; |
|
case 2: // the second '[' of the CDATA open |
|
if (ch != '[') { |
|
panic(FAULT); |
|
} |
|
mBuffIdx = -1; |
|
st = 3; |
|
break; |
|
case 3: // read data before the first ']' |
|
if (ch != ']') { |
|
bappend(ch); |
|
} else { |
|
st = 4; |
|
} |
|
break; |
|
case 4: // read the second ']' or continue to read the data |
|
if (ch != ']') { |
|
bappend(']'); |
|
bappend(ch); |
|
st = 3; |
|
} else { |
|
st = 5; |
|
} |
|
break; |
|
case 5: // read '>' or continue to read the data |
|
switch (ch) { |
|
case ']': |
|
bappend(']'); |
|
break; |
|
case '>': |
|
bflash(); |
|
st = -1; |
|
break; |
|
default: |
|
bappend(']'); |
|
bappend(']'); |
|
bappend(ch); |
|
st = 3; |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Reads a xml name. |
|
* |
|
* The xml name must conform "Namespaces in XML" specification. Therefore |
|
* the ':' character is not allowed in the name. This method should be used |
|
* for PI and entity names which may not have a namespace according to the |
|
* specification mentioned above. |
|
* |
|
* @param ns The true value turns namespace conformance on. |
|
* @return The name has been read. |
|
* @exception Exception When incorrect character appear in the name. |
|
* @exception IOException |
|
*/ |
|
protected String name(boolean ns) |
|
throws Exception { |
|
mBuffIdx = -1; |
|
bname(ns); |
|
return new String(mBuff, 1, mBuffIdx); |
|
} |
|
/** |
|
* Reads a qualified xml name. |
|
* |
|
* The characters of a qualified name is an array of characters. The first |
|
* (chars[0]) character is the index of the colon character which separates |
|
* the prefix from the local name. If the index is zero, the name does not |
|
* contain separator or the parser works in the namespace unaware mode. The |
|
* length of qualified name is the length of the array minus one. |
|
* |
|
* @param ns The true value turns namespace conformance on. |
|
* @return The characters of a qualified name. |
|
* @exception Exception When incorrect character appear in the name. |
|
* @exception IOException |
|
*/ |
|
protected char[] qname(boolean ns) |
|
throws Exception { |
|
mBuffIdx = -1; |
|
bname(ns); |
|
char chars[] = new char[mBuffIdx + 1]; |
|
System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1); |
|
return chars; |
|
} |
|
/** |
|
* Reads the public or/and system identifiers. |
|
* |
|
* @param inp The input object. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void pubsys(Input inp) |
|
throws Exception { |
|
Pair pair = pubsys(' '); |
|
inp.pubid = pair.name; |
|
inp.sysid = pair.value; |
|
del(pair); |
|
} |
|
/** |
|
* Reads the public or/and system identifiers. |
|
* |
|
* @param flag The 'N' allows public id be without system id. |
|
* @return The public or/and system identifiers pair. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private Pair pubsys(char flag) throws Exception { |
|
Pair ids = pair(null); |
|
String str = name(false); |
|
if ("PUBLIC".equals(str) == true) { |
|
bqstr('i'); // non-CDATA normalization [#4.2.2] |
|
ids.name = new String(mBuff, 1, mBuffIdx); |
|
switch (wsskip()) { |
|
case '\"': |
|
case '\'': |
|
bqstr(' '); |
|
ids.value = new String(mBuff, 1, mBuffIdx); |
|
break; |
|
case EOS: |
|
panic(FAULT); |
|
default: |
|
if (flag != 'N') // [#4.7] |
|
{ |
|
panic(FAULT); |
|
} |
|
ids.value = null; |
|
break; |
|
} |
|
return ids; |
|
} else if ("SYSTEM".equals(str) == true) { |
|
ids.name = null; |
|
bqstr(' '); |
|
ids.value = new String(mBuff, 1, mBuffIdx); |
|
return ids; |
|
} |
|
panic(FAULT); |
|
return null; |
|
} |
|
/** |
|
* Reads an attribute value. |
|
* |
|
* The grammar which this method can read is:<br /> |
|
* <code>eqstr := S "=" qstr</code><br /> |
|
* <code>qstr := S ("'" string "'") | |
|
* ('"' string '"')</code><br /> This method resolves entities |
|
* inside a string unless the parser parses DTD. |
|
* |
|
* @param flag The '=' character forces the method to accept the '=' |
|
* character before quoted string and read the following string as not an |
|
* attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; |
|
* '-' - not an attribute value; 'd' - in DTD context. |
|
* @return The content of the quoted strign as a string. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
protected String eqstr(char flag) throws Exception { |
|
if (flag == '=') { |
|
wsskip(); |
|
if (getch() != '=') { |
|
panic(FAULT); |
|
} |
|
} |
|
bqstr((flag == '=') ? '-' : flag); |
|
return new String(mBuff, 1, mBuffIdx); |
|
} |
|
/** |
|
* Resoves an entity. |
|
* |
|
* This method resolves built-in and character entity references. It is also |
|
* reports external entities to the application. |
|
* |
|
* @param flag The 'x' character forces the method to report a skipped |
|
* entity; 'i' character - indicates non-CDATA normalization. |
|
* @return Name of unresolved entity or <code>null</code> if entity had been |
|
* resolved successfully. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private String ent(char flag) throws Exception { |
|
char ch; |
|
int idx = mBuffIdx + 1; |
|
Input inp = null; |
|
String str = null; |
|
mESt = 0x100; // reset the built-in entity recognizer |
|
bappend('&'); |
|
for (short st = 0; st >= 0;) { |
|
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); |
|
switch (st) { |
|
case 0: // the first character of the entity name |
|
case 1: // read built-in entity name |
|
switch (chtyp(ch)) { |
|
case 'd': |
|
case '.': |
|
case '-': |
|
if (st != 1) { |
|
panic(FAULT); |
|
} |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
case 'X': |
|
bappend(ch); |
|
eappend(ch); |
|
st = 1; |
|
break; |
|
case ':': |
|
if (mIsNSAware != false) { |
|
panic(FAULT); |
|
} |
|
bappend(ch); |
|
eappend(ch); |
|
st = 1; |
|
break; |
|
case ';': |
|
if (mESt < 0x100) { |
|
// The entity is a built-in entity |
|
mBuffIdx = idx - 1; |
|
bappend(mESt); |
|
st = -1; |
|
break; |
|
} else if (mPh == PH_DTD) { |
|
// In DTD entity declaration has to resolve character |
|
// entities and include "as is" others. [#4.4.7] |
|
bappend(';'); |
|
st = -1; |
|
break; |
|
} |
|
// Convert an entity name to a string |
|
str = new String(mBuff, idx + 1, mBuffIdx - idx); |
|
inp = mEnt.get(str); |
|
// Restore the buffer offset |
|
mBuffIdx = idx - 1; |
|
if (inp != null) { |
|
if (inp.chars == null) { |
|
// External entity |
|
InputSource is = resolveEnt(str, inp.pubid, inp.sysid); |
|
if (is != null) { |
|
push(new Input(BUFFSIZE_READER)); |
|
setinp(is); |
|
mInp.pubid = inp.pubid; |
|
mInp.sysid = inp.sysid; |
|
str = null; // the entity is resolved |
|
} else { |
|
// Unresolved external entity |
|
if (flag != 'x') { |
|
panic(FAULT); // unknown entity within marckup |
|
} // str is name of unresolved entity |
|
} |
|
} else { |
|
// Internal entity |
|
push(inp); |
|
str = null; // the entity is resolved |
|
} |
|
} else { |
|
// Unknown or general unparsed entity |
|
if (flag != 'x') { |
|
panic(FAULT); // unknown entity within marckup |
|
} // str is name of unresolved entity |
|
} |
|
st = -1; |
|
break; |
|
case '#': |
|
if (st != 0) { |
|
panic(FAULT); |
|
} |
|
st = 2; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 2: // read character entity |
|
switch (chtyp(ch)) { |
|
case 'd': |
|
bappend(ch); |
|
break; |
|
case ';': |
|
// Convert the character entity to a character |
|
try { |
|
int i = Integer.parseInt( |
|
new String(mBuff, idx + 1, mBuffIdx - idx), 10); |
|
if (i >= 0xffff) { |
|
panic(FAULT); |
|
} |
|
ch = (char) i; |
|
} catch (NumberFormatException nfe) { |
|
panic(FAULT); |
|
} |
|
// Restore the buffer offset |
|
mBuffIdx = idx - 1; |
|
if (ch == ' ' || mInp.next != null) { |
|
bappend(ch, flag); |
|
} else { |
|
bappend(ch); |
|
} |
|
st = -1; |
|
break; |
|
case 'a': |
|
// If the entity buffer is empty and ch == 'x' |
|
if ((mBuffIdx == idx) && (ch == 'x')) { |
|
st = 3; |
|
break; |
|
} |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 3: // read hex character entity |
|
switch (chtyp(ch)) { |
|
case 'A': |
|
case 'a': |
|
case 'd': |
|
bappend(ch); |
|
break; |
|
case ';': |
|
// Convert the character entity to a character |
|
try { |
|
int i = Integer.parseInt( |
|
new String(mBuff, idx + 1, mBuffIdx - idx), 16); |
|
if (i >= 0xffff) { |
|
panic(FAULT); |
|
} |
|
ch = (char) i; |
|
} catch (NumberFormatException nfe) { |
|
panic(FAULT); |
|
} |
|
// Restore the buffer offset |
|
mBuffIdx = idx - 1; |
|
if (ch == ' ' || mInp.next != null) { |
|
bappend(ch, flag); |
|
} else { |
|
bappend(ch); |
|
} |
|
st = -1; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
return str; |
|
} |
|
/** |
|
* Resoves a parameter entity. |
|
* |
|
* This method resolves a parameter entity references. It is also reports |
|
* external entities to the application. |
|
* |
|
* @param flag The '-' instruct the method to do not set up surrounding |
|
* spaces [#4.4.8]. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void pent(char flag) throws Exception { |
|
char ch; |
|
int idx = mBuffIdx + 1; |
|
Input inp = null; |
|
String str = null; |
|
bappend('%'); |
|
if (mPh != PH_DTD) // the DTD internal subset |
|
{ |
|
return; // Not Recognized [#4.4.1] |
|
} // Read entity name |
|
bname(false); |
|
str = new String(mBuff, idx + 2, mBuffIdx - idx - 1); |
|
if (getch() != ';') { |
|
panic(FAULT); |
|
} |
|
inp = mPEnt.get(str); |
|
// Restore the buffer offset |
|
mBuffIdx = idx - 1; |
|
if (inp != null) { |
|
if (inp.chars == null) { |
|
// External parameter entity |
|
InputSource is = resolveEnt(str, inp.pubid, inp.sysid); |
|
if (is != null) { |
|
if (flag != '-') { |
|
bappend(' '); // tail space |
|
} |
|
push(new Input(BUFFSIZE_READER)); |
|
// BUG: there is no leading space! [#4.4.8] |
|
setinp(is); |
|
mInp.pubid = inp.pubid; |
|
mInp.sysid = inp.sysid; |
|
} else { |
|
// Unresolved external parameter entity |
|
skippedEnt("%" + str); |
|
} |
|
} else { |
|
// Internal parameter entity |
|
if (flag == '-') { |
|
// No surrounding spaces |
|
inp.chIdx = 1; |
|
} else { |
|
// Insert surrounding spaces |
|
bappend(' '); // tail space |
|
inp.chIdx = 0; |
|
} |
|
push(inp); |
|
} |
|
} else { |
|
// Unknown parameter entity |
|
skippedEnt("%" + str); |
|
} |
|
} |
|
/** |
|
* Recognizes and handles a namespace declaration. |
|
* |
|
* This method identifies a type of namespace declaration if any and puts |
|
* new mapping on top of prefix stack. |
|
* |
|
* @param name The attribute qualified name (<code>name.value</code> is a |
|
* <code>String</code> object which represents the attribute prefix). |
|
* @param value The attribute value. |
|
* @return <code>true</code> if a namespace declaration is recognized. |
|
*/ |
|
private boolean isdecl(Pair name, String value) { |
|
if (name.chars[0] == 0) { |
|
if ("xmlns".equals(name.name) == true) { |
|
// New default namespace declaration |
|
mPref = pair(mPref); |
|
mPref.list = mElm; // prefix owner element |
|
mPref.value = value; |
|
mPref.name = ""; |
|
mPref.chars = NONS; |
|
mElm.num++; // namespace counter |
|
return true; |
|
} |
|
} else { |
|
if (name.eqpref(XMLNS) == true) { |
|
// New prefix declaration |
|
int len = name.name.length(); |
|
mPref = pair(mPref); |
|
mPref.list = mElm; // prefix owner element |
|
mPref.value = value; |
|
mPref.name = name.name; |
|
mPref.chars = new char[len + 1]; |
|
mPref.chars[0] = (char) (len + 1); |
|
name.name.getChars(0, len, mPref.chars, 1); |
|
mElm.num++; // namespace counter |
|
return true; |
|
} |
|
} |
|
return false; |
|
} |
|
/** |
|
* Resolves a prefix. |
|
* |
|
* @return The namespace assigned to the prefix. |
|
* @exception Exception When mapping for specified prefix is not found. |
|
*/ |
|
private String rslv(char[] qname) |
|
throws Exception { |
|
for (Pair pref = mPref; pref != null; pref = pref.next) { |
|
if (pref.eqpref(qname) == true) { |
|
return pref.value; |
|
} |
|
} |
|
if (qname[0] == 1) { // QNames like ':local' |
|
for (Pair pref = mPref; pref != null; pref = pref.next) { |
|
if (pref.chars[0] == 0) { |
|
return pref.value; |
|
} |
|
} |
|
} |
|
panic(FAULT); |
|
return null; |
|
} |
|
/** |
|
* Skips xml white space characters. |
|
* |
|
* This method skips white space characters (' ', '\t', '\n', '\r') and |
|
* looks ahead not white space character. |
|
* |
|
* @return The first not white space look ahead character. |
|
* @exception IOException |
|
*/ |
|
protected char wsskip() |
|
throws IOException { |
|
char ch; |
|
while (true) { |
|
// Read next character |
|
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); |
|
if (ch < 0x80) { |
|
if (nmttyp[ch] != 3) // [ \t\n\r] |
|
{ |
|
break; |
|
} |
|
} else { |
|
break; |
|
} |
|
} |
|
mChIdx--; // bkch(); |
|
return ch; |
|
} |
|
/** |
|
* Reports document type. |
|
* |
|
* @param name The name of the entity. |
|
* @param pubid The public identifier of the entity or <code>null</code>. |
|
* @param sysid The system identifier of the entity or <code>null</code>. |
|
*/ |
|
protected abstract void docType(String name, String pubid, String sysid) |
|
throws SAXException; |
|
/** |
|
* Reports a comment. |
|
* |
|
* @param text The comment text starting from first charcater. |
|
* @param length The number of characters in comment. |
|
*/ |
|
protected abstract void comm(char[] text, int length); |
|
/** |
|
* Reports a processing instruction. |
|
* |
|
* @param target The processing instruction target name. |
|
* @param body The processing instruction body text. |
|
*/ |
|
protected abstract void pi(String target, String body) |
|
throws Exception; |
|
/** |
|
* Reports new namespace prefix. The Namespace prefix ( |
|
* <code>mPref.name</code>) being declared and the Namespace URI ( |
|
* <code>mPref.value</code>) the prefix is mapped to. An empty string is |
|
* used for the default element namespace, which has no prefix. |
|
*/ |
|
protected abstract void newPrefix() |
|
throws Exception; |
|
/** |
|
* Reports skipped entity name. |
|
* |
|
* @param name The entity name. |
|
*/ |
|
protected abstract void skippedEnt(String name) |
|
throws Exception; |
|
/** |
|
* Returns an |
|
* <code>InputSource</code> for specified entity or |
|
* <code>null</code>. |
|
* |
|
* @param name The name of the entity. |
|
* @param pubid The public identifier of the entity. |
|
* @param sysid The system identifier of the entity. |
|
*/ |
|
protected abstract InputSource resolveEnt( |
|
String name, String pubid, String sysid) |
|
throws Exception; |
|
/** |
|
* Reports notation declaration. |
|
* |
|
* @param name The notation's name. |
|
* @param pubid The notation's public identifier, or null if none was given. |
|
* @param sysid The notation's system identifier, or null if none was given. |
|
*/ |
|
protected abstract void notDecl(String name, String pubid, String sysid) |
|
throws Exception; |
|
/** |
|
* Reports unparsed entity name. |
|
* |
|
* @param name The unparsed entity's name. |
|
* @param pubid The entity's public identifier, or null if none was given. |
|
* @param sysid The entity's system identifier. |
|
* @param notation The name of the associated notation. |
|
*/ |
|
protected abstract void unparsedEntDecl( |
|
String name, String pubid, String sysid, String notation) |
|
throws Exception; |
|
/** |
|
* Notifies the handler about fatal parsing error. |
|
* |
|
* @param msg The problem description message. |
|
*/ |
|
protected abstract void panic(String msg) |
|
throws Exception; |
|
/** |
|
* Reads a qualified xml name. |
|
* |
|
* This is low level routine which leaves a qName in the buffer. The |
|
* characters of a qualified name is an array of characters. The first |
|
* (chars[0]) character is the index of the colon character which separates |
|
* the prefix from the local name. If the index is zero, the name does not |
|
* contain separator or the parser works in the namespace unaware mode. The |
|
* length of qualified name is the length of the array minus one. |
|
* |
|
* @param ns The true value turns namespace conformance on. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private void bname(boolean ns) |
|
throws Exception { |
|
char ch; |
|
char type; |
|
mBuffIdx++; // allocate a char for colon offset |
|
int bqname = mBuffIdx; |
|
int bcolon = bqname; |
|
int bchidx = bqname + 1; |
|
int bstart = bchidx; |
|
int cstart = mChIdx; |
|
short st = (short) ((ns == true) ? 0 : 2); |
|
while (true) { |
|
// Read next character |
|
if (mChIdx >= mChLen) { |
|
bcopy(cstart, bstart); |
|
getch(); |
|
mChIdx--; // bkch(); |
|
cstart = mChIdx; |
|
bstart = bchidx; |
|
} |
|
ch = mChars[mChIdx++]; |
|
type = (char) 0; // [X] |
|
if (ch < 0x80) { |
|
type = (char) nmttyp[ch]; |
|
} else if (ch == EOS) { |
|
panic(FAULT); |
|
} |
|
// Parse QName |
|
switch (st) { |
|
case 0: // read the first char of the prefix |
|
case 2: // read the first char of the suffix |
|
switch (type) { |
|
case 0: // [aA_X] |
|
bchidx++; // append char to the buffer |
|
st++; // (st == 0)? 1: 3; |
|
break; |
|
case 1: // [:] |
|
mChIdx--; // bkch(); |
|
st++; // (st == 0)? 1: 3; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 1: // read the prefix |
|
case 3: // read the suffix |
|
switch (type) { |
|
case 0: // [aA_X] |
|
case 2: // [.-d] |
|
bchidx++; // append char to the buffer |
|
break; |
|
case 1: // [:] |
|
bchidx++; // append char to the buffer |
|
if (ns == true) { |
|
if (bcolon != bqname) { |
|
panic(FAULT); // it must be only one colon |
|
} |
|
bcolon = bchidx - 1; |
|
if (st == 1) { |
|
st = 2; |
|
} |
|
} |
|
break; |
|
default: |
|
mChIdx--; // bkch(); |
|
bcopy(cstart, bstart); |
|
mBuff[bqname] = (char) (bcolon - bqname); |
|
return; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
} |
|
/** |
|
* Reads a nmtoken. |
|
* |
|
* This is low level routine which leaves a nmtoken in the buffer. |
|
* |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void bntok() throws Exception { |
|
char ch; |
|
mBuffIdx = -1; |
|
bappend((char) 0); // default offset to the colon char |
|
while (true) { |
|
ch = getch(); |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case 'd': |
|
case '.': |
|
case ':': |
|
case '-': |
|
case '_': |
|
case 'X': |
|
bappend(ch); |
|
break; |
|
case 'Z': |
|
panic(FAULT); |
|
default: |
|
bkch(); |
|
return; |
|
} |
|
} |
|
} |
|
/** |
|
* Recognizes a keyword. |
|
* |
|
* This is low level routine which recognizes one of keywords in the buffer. |
|
* Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN - |
|
* t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED - |
|
* Q IMPLIED - I FIXED - F |
|
* |
|
* @return an id of a keyword or '?'. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private char bkeyword() |
|
throws Exception { |
|
String str = new String(mBuff, 1, mBuffIdx); |
|
switch (str.length()) { |
|
case 2: // ID |
|
return ("ID".equals(str) == true) ? 'i' : '?'; |
|
case 5: // IDREF, CDATA, FIXED |
|
switch (mBuff[1]) { |
|
case 'I': |
|
return ("IDREF".equals(str) == true) ? 'r' : '?'; |
|
case 'C': |
|
return ("CDATA".equals(str) == true) ? 'c' : '?'; |
|
case 'F': |
|
return ("FIXED".equals(str) == true) ? 'F' : '?'; |
|
default: |
|
break; |
|
} |
|
break; |
|
case 6: // IDREFS, ENTITY |
|
switch (mBuff[1]) { |
|
case 'I': |
|
return ("IDREFS".equals(str) == true) ? 'R' : '?'; |
|
case 'E': |
|
return ("ENTITY".equals(str) == true) ? 'n' : '?'; |
|
default: |
|
break; |
|
} |
|
break; |
|
case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT |
|
switch (mBuff[1]) { |
|
case 'I': |
|
return ("IMPLIED".equals(str) == true) ? 'I' : '?'; |
|
case 'N': |
|
return ("NMTOKEN".equals(str) == true) ? 't' : '?'; |
|
case 'A': |
|
return ("ATTLIST".equals(str) == true) ? 'a' : '?'; |
|
case 'E': |
|
return ("ELEMENT".equals(str) == true) ? 'e' : '?'; |
|
default: |
|
break; |
|
} |
|
break; |
|
case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED |
|
switch (mBuff[2]) { |
|
case 'N': |
|
return ("ENTITIES".equals(str) == true) ? 'N' : '?'; |
|
case 'M': |
|
return ("NMTOKENS".equals(str) == true) ? 'T' : '?'; |
|
case 'O': |
|
return ("NOTATION".equals(str) == true) ? 'o' : '?'; |
|
case 'E': |
|
return ("REQUIRED".equals(str) == true) ? 'Q' : '?'; |
|
default: |
|
break; |
|
} |
|
break; |
|
default: |
|
break; |
|
} |
|
return '?'; |
|
} |
|
/** |
|
* Reads a single or double quotted string in to the buffer. |
|
* |
|
* This method resolves entities inside a string unless the parser parses |
|
* DTD. |
|
* |
|
* @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' - |
|
* not an attribute value; 'd' - in DTD context. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void bqstr(char flag) throws Exception { |
|
Input inp = mInp; // remember the original input |
|
mBuffIdx = -1; |
|
bappend((char) 0); // default offset to the colon char |
|
char ch; |
|
for (short st = 0; st >= 0;) { |
|
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); |
|
switch (st) { |
|
case 0: // read a single or double quote |
|
switch (ch) { |
|
case ' ': |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
break; |
|
case '\'': |
|
st = 2; // read a single quoted string |
|
break; |
|
case '\"': |
|
st = 3; // read a double quoted string |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
break; |
|
case 2: // read a single quoted string |
|
case 3: // read a double quoted string |
|
switch (ch) { |
|
case '\'': |
|
if ((st == 2) && (mInp == inp)) { |
|
st = -1; |
|
} else { |
|
bappend(ch); |
|
} |
|
break; |
|
case '\"': |
|
if ((st == 3) && (mInp == inp)) { |
|
st = -1; |
|
} else { |
|
bappend(ch); |
|
} |
|
break; |
|
case '&': |
|
if (flag != 'd') { |
|
ent(flag); |
|
} else { |
|
bappend(ch); |
|
} |
|
break; |
|
case '%': |
|
if (flag == 'd') { |
|
pent('-'); |
|
} else { |
|
bappend(ch); |
|
} |
|
break; |
|
case '<': |
|
if ((flag == '-') || (flag == 'd')) { |
|
bappend(ch); |
|
} else { |
|
panic(FAULT); |
|
} |
|
break; |
|
case EOS: // EOS before single/double quote |
|
panic(FAULT); |
|
case '\r': // EOL processing [#2.11 & #3.3.3] |
|
if (flag != ' ' && mInp.next == null) { |
|
if (getch() != '\n') { |
|
bkch(); |
|
} |
|
ch = '\n'; |
|
} |
|
default: |
|
bappend(ch, flag); |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
// There is maximum one space at the end of the string in |
|
// i-mode (non CDATA normalization) and it has to be removed. |
|
if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) { |
|
mBuffIdx -= 1; |
|
} |
|
} |
|
/** |
|
* Reports characters and empties the parser's buffer. This method is called |
|
* only if parser is going to return control to the main loop. This means |
|
* that this method may use parser buffer to report white space without |
|
* copeing characters to temporary buffer. |
|
*/ |
|
protected abstract void bflash() |
|
throws Exception; |
|
/** |
|
* Reports white space characters and empties the parser's buffer. This |
|
* method is called only if parser is going to return control to the main |
|
* loop. This means that this method may use parser buffer to report white |
|
* space without copeing characters to temporary buffer. |
|
*/ |
|
protected abstract void bflash_ws() |
|
throws Exception; |
|
/** |
|
* Appends a character to parser's buffer with normalization. |
|
* |
|
* @param ch The character to append to the buffer. |
|
* @param mode The normalization mode. |
|
*/ |
|
private void bappend(char ch, char mode) { |
|
// This implements attribute value normalization as |
|
// described in the XML specification [#3.3.3]. |
|
switch (mode) { |
|
case 'i': // non CDATA normalization |
|
switch (ch) { |
|
case ' ': |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) { |
|
bappend(' '); |
|
} |
|
return; |
|
default: |
|
break; |
|
} |
|
break; |
|
case 'c': // CDATA normalization |
|
switch (ch) { |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
ch = ' '; |
|
break; |
|
default: |
|
break; |
|
} |
|
break; |
|
default: // no normalization |
|
break; |
|
} |
|
mBuffIdx++; |
|
if (mBuffIdx < mBuff.length) { |
|
mBuff[mBuffIdx] = ch; |
|
} else { |
|
mBuffIdx--; |
|
bappend(ch); |
|
} |
|
} |
|
/** |
|
* Appends a character to parser's buffer. |
|
* |
|
* @param ch The character to append to the buffer. |
|
*/ |
|
private void bappend(char ch) { |
|
try { |
|
mBuff[++mBuffIdx] = ch; |
|
} catch (Exception exp) { |
|
// Double the buffer size |
|
char buff[] = new char[mBuff.length << 1]; |
|
System.arraycopy(mBuff, 0, buff, 0, mBuff.length); |
|
mBuff = buff; |
|
mBuff[mBuffIdx] = ch; |
|
} |
|
} |
|
/** |
|
* Appends (mChIdx - cidx) characters from character buffer (mChars) to |
|
* parser's buffer (mBuff). |
|
* |
|
* @param cidx The character buffer (mChars) start index. |
|
* @param bidx The parser buffer (mBuff) start index. |
|
*/ |
|
private void bcopy(int cidx, int bidx) { |
|
int length = mChIdx - cidx; |
|
if ((bidx + length + 1) >= mBuff.length) { |
|
// Expand the buffer |
|
char buff[] = new char[mBuff.length + length]; |
|
System.arraycopy(mBuff, 0, buff, 0, mBuff.length); |
|
mBuff = buff; |
|
} |
|
System.arraycopy(mChars, cidx, mBuff, bidx, length); |
|
mBuffIdx += length; |
|
} |
|
/** |
|
* Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>, |
|
* <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe |
|
* 0x100 is a built-in entity replacement character. |
|
* |
|
* @param ch the next character of an entity name. |
|
*/ |
|
@SuppressWarnings("fallthrough") |
|
private void eappend(char ch) { |
|
switch (mESt) { |
|
case 0x100: // "l" or "g" or "a" or "q" |
|
switch (ch) { |
|
case 'l': |
|
mESt = 0x101; |
|
break; |
|
case 'g': |
|
mESt = 0x102; |
|
break; |
|
case 'a': |
|
mESt = 0x103; |
|
break; |
|
case 'q': |
|
mESt = 0x107; |
|
break; |
|
default: |
|
mESt = 0x200; |
|
break; |
|
} |
|
break; |
|
case 0x101: // "lt" |
|
mESt = (ch == 't') ? '<' : (char) 0x200; |
|
break; |
|
case 0x102: // "gt" |
|
mESt = (ch == 't') ? '>' : (char) 0x200; |
|
break; |
|
case 0x103: // "am" or "ap" |
|
switch (ch) { |
|
case 'm': |
|
mESt = 0x104; |
|
break; |
|
case 'p': |
|
mESt = 0x105; |
|
break; |
|
default: |
|
mESt = 0x200; |
|
break; |
|
} |
|
break; |
|
case 0x104: // "amp" |
|
mESt = (ch == 'p') ? '&' : (char) 0x200; |
|
break; |
|
case 0x105: // "apo" |
|
mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200; |
|
break; |
|
case 0x106: // "apos" |
|
mESt = (ch == 's') ? '\'' : (char) 0x200; |
|
break; |
|
case 0x107: // "qu" |
|
mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200; |
|
break; |
|
case 0x108: // "quo" |
|
mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200; |
|
break; |
|
case 0x109: // "quot" |
|
mESt = (ch == 't') ? '\"' : (char) 0x200; |
|
break; |
|
case '<': // "lt" |
|
case '>': // "gt" |
|
case '&': // "amp" |
|
case '\'': // "apos" |
|
case '\"': // "quot" |
|
mESt = 0x200; |
|
default: |
|
break; |
|
} |
|
} |
|
/** |
|
* Sets up a new input source on the top of the input stack. Note, the first |
|
* byte returned by the entity's byte stream has to be the first byte in the |
|
* entity. However, the parser does not expect the byte order mask in both |
|
* cases when encoding is provided by the input source. |
|
* |
|
* @param is A new input source to set up. |
|
* @exception IOException If any IO errors occur. |
|
* @exception Exception is parser specific exception form panic method. |
|
*/ |
|
protected void setinp(InputSource is) |
|
throws Exception { |
|
Reader reader = null; |
|
mChIdx = 0; |
|
mChLen = 0; |
|
mChars = mInp.chars; |
|
mInp.src = null; |
|
if (mPh < PH_DOC_START) { |
|
mIsSAlone = false; // default [#2.9] |
|
} |
|
mIsSAloneSet = false; |
|
if (is.getCharacterStream() != null) { |
|
// Ignore encoding in the xml text decl. |
|
reader = is.getCharacterStream(); |
|
xml(reader); |
|
} else if (is.getByteStream() != null) { |
|
String expenc; |
|
if (is.getEncoding() != null) { |
|
// Ignore encoding in the xml text decl. |
|
expenc = is.getEncoding().toUpperCase(); |
|
if (expenc.equals("UTF-16")) { |
|
reader = bom(is.getByteStream(), 'U'); // UTF-16 [#4.3.3] |
|
} else { |
|
reader = enc(expenc, is.getByteStream()); |
|
} |
|
xml(reader); |
|
} else { |
|
// Get encoding from BOM or the xml text decl. |
|
reader = bom(is.getByteStream(), ' '); |
|
if (reader == null) { |
|
// Encoding is defined by the xml text decl. |
|
reader = enc("UTF-8", is.getByteStream()); |
|
expenc = xml(reader); |
|
if (expenc.startsWith("UTF-16")) { |
|
panic(FAULT); // UTF-16 must have BOM [#4.3.3] |
|
} |
|
reader = enc(expenc, is.getByteStream()); |
|
} else { |
|
// Encoding is defined by the BOM. |
|
xml(reader); |
|
} |
|
} |
|
} else { |
|
// There is no support for public/system identifiers. |
|
panic(FAULT); |
|
} |
|
mInp.src = reader; |
|
mInp.pubid = is.getPublicId(); |
|
mInp.sysid = is.getSystemId(); |
|
} |
|
/** |
|
* Determines the entity encoding. |
|
* |
|
* This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the |
|
* first byte returned by the entity's byte stream has to be the first byte |
|
* in the entity. Also, there is no support for UCS-4. |
|
* |
|
* @param is A byte stream of the entity. |
|
* @param hint An encoding hint, character U means UTF-16. |
|
* @return a reader constructed from the BOM or UTF-8 by default. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private Reader bom(InputStream is, char hint) |
|
throws Exception { |
|
int val = is.read(); |
|
switch (val) { |
|
case 0xef: // UTF-8 |
|
if (hint == 'U') // must be UTF-16 |
|
{ |
|
panic(FAULT); |
|
} |
|
if (is.read() != 0xbb) { |
|
panic(FAULT); |
|
} |
|
if (is.read() != 0xbf) { |
|
panic(FAULT); |
|
} |
|
return new ReaderUTF8(is); |
|
case 0xfe: // UTF-16, big-endian |
|
if (is.read() != 0xff) { |
|
panic(FAULT); |
|
} |
|
return new ReaderUTF16(is, 'b'); |
|
case 0xff: // UTF-16, little-endian |
|
if (is.read() != 0xfe) { |
|
panic(FAULT); |
|
} |
|
return new ReaderUTF16(is, 'l'); |
|
case -1: |
|
mChars[mChIdx++] = EOS; |
|
return new ReaderUTF8(is); |
|
default: |
|
if (hint == 'U') // must be UTF-16 |
|
{ |
|
panic(FAULT); |
|
} |
|
// Read the rest of UTF-8 character |
|
switch (val & 0xf0) { |
|
case 0xc0: |
|
case 0xd0: |
|
mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f)); |
|
break; |
|
case 0xe0: |
|
mChars[mChIdx++] = (char) (((val & 0x0f) << 12) |
|
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); |
|
break; |
|
case 0xf0: // UCS-4 character |
|
throw new UnsupportedEncodingException(); |
|
default: |
|
mChars[mChIdx++] = (char) val; |
|
break; |
|
} |
|
return null; |
|
} |
|
} |
|
/** |
|
* Parses the xml text declaration. |
|
* |
|
* This method gets encoding from the xml text declaration [#4.3.1] if any. |
|
* The method assumes the buffer (mChars) is big enough to accommodate whole |
|
* xml text declaration. |
|
* |
|
* @param reader is entity reader. |
|
* @return The xml text declaration encoding or default UTF-8 encoding. |
|
* @exception Exception is parser specific exception form panic method. |
|
* @exception IOException |
|
*/ |
|
private String xml(Reader reader) |
|
throws Exception { |
|
String str = null; |
|
String enc = "UTF-8"; |
|
char ch; |
|
int val; |
|
short st; |
|
// Read the xml text declaration into the buffer |
|
if (mChIdx != 0) { |
|
// The bom method have read ONE char into the buffer. |
|
st = (short) ((mChars[0] == '<') ? 1 : -1); |
|
} else { |
|
st = 0; |
|
} |
|
while (st >= 0 && mChIdx < mChars.length) { |
|
ch = ((val = reader.read()) >= 0) ? (char) val : EOS; |
|
mChars[mChIdx++] = ch; |
|
switch (st) { |
|
case 0: // read '<' of xml declaration |
|
switch (ch) { |
|
case '<': |
|
st = 1; |
|
break; |
|
case 0xfeff: // the byte order mask |
|
ch = ((val = reader.read()) >= 0) ? (char) val : EOS; |
|
mChars[mChIdx - 1] = ch; |
|
st = (short) ((ch == '<') ? 1 : -1); |
|
break; |
|
default: |
|
st = -1; |
|
break; |
|
} |
|
break; |
|
case 1: // read '?' of xml declaration [#4.3.1] |
|
st = (short) ((ch == '?') ? 2 : -1); |
|
break; |
|
case 2: // read 'x' of xml declaration [#4.3.1] |
|
st = (short) ((ch == 'x') ? 3 : -1); |
|
break; |
|
case 3: // read 'm' of xml declaration [#4.3.1] |
|
st = (short) ((ch == 'm') ? 4 : -1); |
|
break; |
|
case 4: // read 'l' of xml declaration [#4.3.1] |
|
st = (short) ((ch == 'l') ? 5 : -1); |
|
break; |
|
case 5: // read white space after 'xml' |
|
switch (ch) { |
|
case ' ': |
|
case '\t': |
|
case '\r': |
|
case '\n': |
|
st = 6; |
|
break; |
|
default: |
|
st = -1; |
|
break; |
|
} |
|
break; |
|
case 6: // read content of xml declaration |
|
switch (ch) { |
|
case '?': |
|
st = 7; |
|
break; |
|
case EOS: |
|
st = -2; |
|
break; |
|
default: |
|
break; |
|
} |
|
break; |
|
case 7: // read '>' after '?' of xml declaration |
|
switch (ch) { |
|
case '>': |
|
case EOS: |
|
st = -2; |
|
break; |
|
default: |
|
st = 6; |
|
break; |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
break; |
|
} |
|
} |
|
mChLen = mChIdx; |
|
mChIdx = 0; |
|
// If there is no xml text declaration, the encoding is default. |
|
if (st == -1) { |
|
return enc; |
|
} |
|
mChIdx = 5; // the first white space after "<?xml" |
|
// Parse the xml text declaration |
|
for (st = 0; st >= 0;) { |
|
ch = getch(); |
|
switch (st) { |
|
case 0: // skip spaces after the xml declaration name |
|
if (chtyp(ch) != ' ') { |
|
bkch(); |
|
st = 1; |
|
} |
|
break; |
|
case 1: // read xml declaration version |
|
case 2: // read xml declaration encoding or standalone |
|
case 3: // read xml declaration standalone |
|
switch (chtyp(ch)) { |
|
case 'a': |
|
case 'A': |
|
case '_': |
|
bkch(); |
|
str = name(false).toLowerCase(); |
|
if ("version".equals(str) == true) { |
|
if (st != 1) { |
|
panic(FAULT); |
|
} |
|
if ("1.0".equals(eqstr('=')) != true) { |
|
panic(FAULT); |
|
} |
|
mInp.xmlver = 0x0100; |
|
st = 2; |
|
} else if ("encoding".equals(str) == true) { |
|
if (st != 2) { |
|
panic(FAULT); |
|
} |
|
mInp.xmlenc = eqstr('=').toUpperCase(); |
|
enc = mInp.xmlenc; |
|
st = 3; |
|
} else if ("standalone".equals(str) == true) { |
|
if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1] |
|
{ |
|
panic(FAULT); |
|
} |
|
str = eqstr('=').toLowerCase(); |
|
// Check the 'standalone' value and use it [#5.1] |
|
if (str.equals("yes") == true) { |
|
mIsSAlone = true; |
|
} else if (str.equals("no") == true) { |
|
mIsSAlone = false; |
|
} else { |
|
panic(FAULT); |
|
} |
|
mIsSAloneSet = true; |
|
st = 4; |
|
} else { |
|
panic(FAULT); |
|
} |
|
break; |
|
case ' ': |
|
break; |
|
case '?': |
|
if (st == 1) { |
|
panic(FAULT); |
|
} |
|
bkch(); |
|
st = 4; |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
case 4: // end of xml declaration |
|
switch (chtyp(ch)) { |
|
case '?': |
|
if (getch() != '>') { |
|
panic(FAULT); |
|
} |
|
if (mPh <= PH_DOC_START) { |
|
mPh = PH_MISC_DTD; // misc before DTD |
|
} |
|
st = -1; |
|
break; |
|
case ' ': |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
break; |
|
default: |
|
panic(FAULT); |
|
} |
|
} |
|
return enc; |
|
} |
|
/** |
|
* Sets up the document reader. |
|
* |
|
* @param name an encoding name. |
|
* @param is the document byte input stream. |
|
* @return a reader constructed from encoding name and input stream. |
|
* @exception UnsupportedEncodingException |
|
*/ |
|
private Reader enc(String name, InputStream is) |
|
throws UnsupportedEncodingException { |
|
// DO NOT CLOSE current reader if any! |
|
if (name.equals("UTF-8")) { |
|
return new ReaderUTF8(is); |
|
} else if (name.equals("UTF-16LE")) { |
|
return new ReaderUTF16(is, 'l'); |
|
} else if (name.equals("UTF-16BE")) { |
|
return new ReaderUTF16(is, 'b'); |
|
} else { |
|
return new InputStreamReader(is, name); |
|
} |
|
} |
|
/** |
|
* Sets up current input on the top of the input stack. |
|
* |
|
* @param inp A new input to set up. |
|
*/ |
|
protected void push(Input inp) { |
|
mInp.chLen = mChLen; |
|
mInp.chIdx = mChIdx; |
|
inp.next = mInp; |
|
mInp = inp; |
|
mChars = inp.chars; |
|
mChLen = inp.chLen; |
|
mChIdx = inp.chIdx; |
|
} |
|
/** |
|
* Restores previous input on the top of the input stack. |
|
*/ |
|
protected void pop() { |
|
if (mInp.src != null) { |
|
try { |
|
mInp.src.close(); |
|
} catch (IOException ioe) { |
|
} |
|
mInp.src = null; |
|
} |
|
mInp = mInp.next; |
|
if (mInp != null) { |
|
mChars = mInp.chars; |
|
mChLen = mInp.chLen; |
|
mChIdx = mInp.chIdx; |
|
} else { |
|
mChars = null; |
|
mChLen = 0; |
|
mChIdx = 0; |
|
} |
|
} |
|
/** |
|
* Maps a character to it's type. |
|
* |
|
* Possible character type values are:<br /> - ' ' for any kind of white |
|
* space character;<br /> - 'a' for any lower case alphabetical character |
|
* value;<br /> - 'A' for any upper case alphabetical character value;<br /> |
|
* - 'd' for any decimal digit character value;<br /> - 'z' for any |
|
* character less then ' ' except '\t', '\n', '\r';<br /> - 'X' for any not |
|
* ASCII character;<br /> - 'Z' for EOS character.<br /> An ASCII (7 bit) |
|
* character which does not fall in any category listed above is mapped to |
|
* it self. |
|
* |
|
* @param ch The character to map. |
|
* @return The type of character. |
|
*/ |
|
protected char chtyp(char ch) { |
|
if (ch < 0x80) { |
|
return (char) asctyp[ch]; |
|
} |
|
return (ch != EOS) ? 'X' : 'Z'; |
|
} |
|
/** |
|
* Retrives the next character in the document. |
|
* |
|
* @return The next character in the document. |
|
*/ |
|
protected char getch() |
|
throws IOException { |
|
if (mChIdx >= mChLen) { |
|
if (mInp.src == null) { |
|
pop(); // remove internal entity |
|
return getch(); |
|
} |
|
// Read new portion of the document characters |
|
int Num = mInp.src.read(mChars, 0, mChars.length); |
|
if (Num < 0) { |
|
if (mInp != mDoc) { |
|
pop(); // restore the previous input |
|
return getch(); |
|
} else { |
|
mChars[0] = EOS; |
|
mChLen = 1; |
|
} |
|
} else { |
|
mChLen = Num; |
|
} |
|
mChIdx = 0; |
|
} |
|
return mChars[mChIdx++]; |
|
} |
|
/** |
|
* Puts back the last read character. |
|
* |
|
* This method <strong>MUST NOT</strong> be called more then once after each |
|
* call of {@link #getch getch} method. |
|
*/ |
|
protected void bkch() |
|
throws Exception { |
|
if (mChIdx <= 0) { |
|
panic(FAULT); |
|
} |
|
mChIdx--; |
|
} |
|
/** |
|
* Sets the current character. |
|
* |
|
* @param ch The character to set. |
|
*/ |
|
protected void setch(char ch) { |
|
mChars[mChIdx] = ch; |
|
} |
|
/** |
|
* Finds a pair in the pair chain by a qualified name. |
|
* |
|
* @param chain The first element of the chain of pairs. |
|
* @param qname The qualified name. |
|
* @return A pair with the specified qualified name or null. |
|
*/ |
|
protected Pair find(Pair chain, char[] qname) { |
|
for (Pair pair = chain; pair != null; pair = pair.next) { |
|
if (pair.eqname(qname) == true) { |
|
return pair; |
|
} |
|
} |
|
return null; |
|
} |
|
/** |
|
* Provedes an instance of a pair. |
|
* |
|
* @param next The reference to a next pair. |
|
* @return An instance of a pair. |
|
*/ |
|
protected Pair pair(Pair next) { |
|
Pair pair; |
|
if (mDltd != null) { |
|
pair = mDltd; |
|
mDltd = pair.next; |
|
} else { |
|
pair = new Pair(); |
|
} |
|
pair.next = next; |
|
return pair; |
|
} |
|
/** |
|
* Deletes an instance of a pair. |
|
* |
|
* @param pair The pair to delete. |
|
* @return A reference to the next pair in a chain. |
|
*/ |
|
protected Pair del(Pair pair) { |
|
Pair next = pair.next; |
|
pair.name = null; |
|
pair.value = null; |
|
pair.chars = null; |
|
pair.list = null; |
|
pair.next = mDltd; |
|
mDltd = pair; |
|
return next; |
|
} |
|
} |