| 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 */  | 
 | 
 | 
 | 
package javax.swing.text.html.parser;  | 
 | 
 | 
 | 
import javax.swing.text.SimpleAttributeSet;  | 
 | 
import javax.swing.text.html.HTML;  | 
 | 
import javax.swing.text.ChangedCharSetException;  | 
 | 
import java.io.*;  | 
 | 
import java.util.Hashtable;  | 
 | 
import java.util.Properties;  | 
 | 
import java.util.Vector;  | 
 | 
import java.util.Enumeration;  | 
 | 
import java.net.URL;  | 
 | 
 | 
 | 
import sun.misc.MessageUtils;  | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 */  | 
 | 
public  | 
 | 
class Parser implements DTDConstants { | 
 | 
 | 
 | 
    private char text[] = new char[1024];  | 
 | 
    private int textpos = 0;  | 
 | 
    private TagElement last;  | 
 | 
    private boolean space;  | 
 | 
 | 
 | 
    private char str[] = new char[128];  | 
 | 
    private int strpos = 0;  | 
 | 
 | 
 | 
    protected DTD dtd = null;  | 
 | 
 | 
 | 
    private int ch;  | 
 | 
    private int ln;  | 
 | 
    private Reader in;  | 
 | 
 | 
 | 
    private Element recent;  | 
 | 
    private TagStack stack;  | 
 | 
    private boolean skipTag = false;  | 
 | 
    private TagElement lastFormSent = null;  | 
 | 
    private SimpleAttributeSet attributes = new SimpleAttributeSet();  | 
 | 
 | 
 | 
    // State for <html>, <head> and <body>.  Since people like to slap  | 
 | 
    // together HTML documents without thinking, occasionally they  | 
 | 
    // have multiple instances of these tags.  These booleans track  | 
 | 
    // the first sightings of these tags so they can be safely ignored  | 
 | 
      | 
 | 
    private boolean seenHtml = false;  | 
 | 
    private boolean seenHead = false;  | 
 | 
    private boolean seenBody = false;  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    private boolean ignoreSpace;  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    protected boolean strict = false;  | 
 | 
 | 
 | 
 | 
 | 
      | 
 | 
    private int crlfCount;  | 
 | 
      | 
 | 
    private int crCount;  | 
 | 
      | 
 | 
    private int lfCount;  | 
 | 
 | 
 | 
    //  | 
 | 
    // To correctly identify the start of a tag/comment/text we need two  | 
 | 
    // ivars. Two are needed as handleText isn't invoked until the tag  | 
 | 
    // after the text has been parsed, that is the parser parses the text,  | 
 | 
    // then a tag, then invokes handleText followed by handleStart.  | 
 | 
    //  | 
 | 
      | 
 | 
 | 
 | 
     * tag, text. Use getBlockStartPosition to access this. */  | 
 | 
    private int currentBlockStartPos;  | 
 | 
      | 
 | 
    private int lastBlockStartPos;  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    private static final char[] cp1252Map = { | 
 | 
        8218,    | 
 | 
        402,     | 
 | 
        8222,    | 
 | 
        8230,    | 
 | 
        8224,    | 
 | 
        8225,    | 
 | 
        710,     | 
 | 
        8240,    | 
 | 
        352,     | 
 | 
        8249,    | 
 | 
        338,     | 
 | 
        141,     | 
 | 
        142,     | 
 | 
        143,     | 
 | 
        144,     | 
 | 
        8216,    | 
 | 
        8217,    | 
 | 
        8220,    | 
 | 
        8221,    | 
 | 
        8226,    | 
 | 
        8211,    | 
 | 
        8212,    | 
 | 
        732,     | 
 | 
        8482,    | 
 | 
        353,     | 
 | 
        8250,    | 
 | 
        339,     | 
 | 
        157,     | 
 | 
        158,     | 
 | 
        376      | 
 | 
    };  | 
 | 
 | 
 | 
    public Parser(DTD dtd) { | 
 | 
        this.dtd = dtd;  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected int getCurrentLine() { | 
 | 
        return ln;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    int getBlockStartPosition() { | 
 | 
        return Math.max(0, lastBlockStartPos - 1);  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected TagElement makeTag(Element elem, boolean fictional) { | 
 | 
        return new TagElement(elem, fictional);  | 
 | 
    }  | 
 | 
 | 
 | 
    protected TagElement makeTag(Element elem) { | 
 | 
        return makeTag(elem, false);  | 
 | 
    }  | 
 | 
 | 
 | 
    protected SimpleAttributeSet getAttributes() { | 
 | 
        return attributes;  | 
 | 
    }  | 
 | 
 | 
 | 
    protected void flushAttributes() { | 
 | 
        attributes.removeAttributes(attributes);  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleText(char text[]) { | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleTitle(char text[]) { | 
 | 
        // default behavior is to call handleText. Subclasses  | 
 | 
          | 
 | 
        handleText(text);  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleComment(char text[]) { | 
 | 
    }  | 
 | 
 | 
 | 
    protected void handleEOFInComment() { | 
 | 
        // We've reached EOF.  Our recovery strategy is to  | 
 | 
        // see if we have more than one line in the comment;  | 
 | 
        // if so, we pretend that the comment was an unterminated  | 
 | 
        // single line comment, and reparse the lines after the  | 
 | 
        // first line as normal HTML content.  | 
 | 
 | 
 | 
        int commentEndPos = strIndexOf('\n'); | 
 | 
        if (commentEndPos >= 0) { | 
 | 
            handleComment(getChars(0, commentEndPos));  | 
 | 
            try { | 
 | 
                in.close();  | 
 | 
                in = new CharArrayReader(getChars(commentEndPos + 1));  | 
 | 
                ch = '>';  | 
 | 
            } catch (IOException e) { | 
 | 
                error("ioexception"); | 
 | 
            }  | 
 | 
 | 
 | 
            resetStrBuffer();  | 
 | 
        } else { | 
 | 
              | 
 | 
            error("eof.comment"); | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException { | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleStartTag(TagElement tag) { | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleEndTag(TagElement tag) { | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void handleError(int ln, String msg) { | 
 | 
        /*  | 
 | 
        Thread.dumpStack();  | 
 | 
        System.out.println("**** " + stack); | 
 | 
        System.out.println("line " + ln + ": error: " + msg); | 
 | 
        System.out.println();  | 
 | 
        */  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void handleText(TagElement tag) { | 
 | 
        if (tag.breaksFlow()) { | 
 | 
            space = false;  | 
 | 
            if (!strict) { | 
 | 
                ignoreSpace = true;  | 
 | 
            }  | 
 | 
        }  | 
 | 
        if (textpos == 0) { | 
 | 
            if ((!space) || (stack == null) || last.breaksFlow() ||  | 
 | 
                !stack.advance(dtd.pcdata)) { | 
 | 
                last = tag;  | 
 | 
                space = false;  | 
 | 
                lastBlockStartPos = currentBlockStartPos;  | 
 | 
                return;  | 
 | 
            }  | 
 | 
        }  | 
 | 
        if (space) { | 
 | 
            if (!ignoreSpace) { | 
 | 
                  | 
 | 
                if (textpos + 1 > text.length) { | 
 | 
                    char newtext[] = new char[text.length + 200];  | 
 | 
                    System.arraycopy(text, 0, newtext, 0, text.length);  | 
 | 
                    text = newtext;  | 
 | 
                }  | 
 | 
 | 
 | 
                  | 
 | 
                text[textpos++] = ' ';  | 
 | 
                if (!strict && !tag.getElement().isEmpty()) { | 
 | 
                    ignoreSpace = true;  | 
 | 
                }  | 
 | 
            }  | 
 | 
            space = false;  | 
 | 
        }  | 
 | 
        char newtext[] = new char[textpos];  | 
 | 
        System.arraycopy(text, 0, newtext, 0, textpos);  | 
 | 
        // Handles cases of bad html where the title tag  | 
 | 
          | 
 | 
        if (tag.getElement().getName().equals("title")) { | 
 | 
            handleTitle(newtext);  | 
 | 
        } else { | 
 | 
            handleText(newtext);  | 
 | 
        }  | 
 | 
        lastBlockStartPos = currentBlockStartPos;  | 
 | 
        textpos = 0;  | 
 | 
        last = tag;  | 
 | 
        space = false;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    protected void error(String err, String arg1, String arg2,  | 
 | 
        String arg3) { | 
 | 
        handleError(ln, err + " " + arg1 + " " + arg2 + " " + arg3);  | 
 | 
    }  | 
 | 
 | 
 | 
    protected void error(String err, String arg1, String arg2) { | 
 | 
        error(err, arg1, arg2, "?");  | 
 | 
    }  | 
 | 
    protected void error(String err, String arg1) { | 
 | 
        error(err, arg1, "?", "?");  | 
 | 
    }  | 
 | 
    protected void error(String err) { | 
 | 
        error(err, "?", "?", "?");  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    protected void startTag(TagElement tag) throws ChangedCharSetException { | 
 | 
        Element elem = tag.getElement();  | 
 | 
 | 
 | 
        // If the tag is an empty tag and texpos != 0  | 
 | 
        // this implies that there is text before the  | 
 | 
        // start tag that needs to be processed before  | 
 | 
        // handling the tag.  | 
 | 
          | 
 | 
        if (!elem.isEmpty() ||  | 
 | 
                    ((last != null) && !last.breaksFlow()) ||  | 
 | 
                    (textpos != 0)) { | 
 | 
            handleText(tag);  | 
 | 
        } else { | 
 | 
            // this variable gets updated in handleText().  | 
 | 
            // Since in this case we do not call handleText()  | 
 | 
            // we need to update it here.  | 
 | 
              | 
 | 
            last = tag;  | 
 | 
            // Note that we should really check last.breakFlows before  | 
 | 
              | 
 | 
            space = false;  | 
 | 
        }  | 
 | 
        lastBlockStartPos = currentBlockStartPos;  | 
 | 
 | 
 | 
          | 
 | 
        for (AttributeList a = elem.atts ; a != null ; a = a.next) { | 
 | 
            if ((a.modifier == REQUIRED) &&  | 
 | 
                ((attributes.isEmpty()) ||  | 
 | 
                 ((!attributes.isDefined(a.name)) &&  | 
 | 
                  (!attributes.isDefined(HTML.getAttributeKey(a.name)))))) { | 
 | 
                error("req.att ", a.getName(), elem.getName()); | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        if (elem.isEmpty()) { | 
 | 
            handleEmptyTag(tag);  | 
 | 
            /*  | 
 | 
        } else if (elem.getName().equals("form")) { | 
 | 
            handleStartTag(tag);  | 
 | 
            */  | 
 | 
        } else { | 
 | 
            recent = elem;  | 
 | 
            stack = new TagStack(tag, stack);  | 
 | 
            handleStartTag(tag);  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    protected void endTag(boolean omitted) { | 
 | 
        handleText(stack.tag);  | 
 | 
 | 
 | 
        if (omitted && !stack.elem.omitEnd()) { | 
 | 
            error("end.missing", stack.elem.getName()); | 
 | 
        } else if (!stack.terminate()) { | 
 | 
            error("end.unexpected", stack.elem.getName()); | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        handleEndTag(stack.tag);  | 
 | 
        stack = stack.next;  | 
 | 
        recent = (stack != null) ? stack.elem : null;  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
    boolean ignoreElement(Element elem) { | 
 | 
 | 
 | 
        String stackElement = stack.elem.getName();  | 
 | 
        String elemName = elem.getName();  | 
 | 
          | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
           been seen. **/  | 
 | 
        if ((elemName.equals("html") && seenHtml) || | 
 | 
            (elemName.equals("head") && seenHead) || | 
 | 
            (elemName.equals("body") && seenBody)) { | 
 | 
            return true;  | 
 | 
        }  | 
 | 
        if (elemName.equals("dt") || elemName.equals("dd")) { | 
 | 
            TagStack s = stack;  | 
 | 
            while (s != null && !s.elem.getName().equals("dl")) { | 
 | 
                s = s.next;  | 
 | 
            }  | 
 | 
            if (s == null) { | 
 | 
                return true;  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        if (((stackElement.equals("table")) && | 
 | 
             (!elemName.equals("#pcdata")) && (!elemName.equals("input"))) || | 
 | 
            ((elemName.equals("font")) && | 
 | 
             (stackElement.equals("ul") || stackElement.equals("ol"))) || | 
 | 
            (elemName.equals("meta") && stack != null) || | 
 | 
            (elemName.equals("style") && seenBody) || | 
 | 
            (stackElement.equals("table") && elemName.equals("a"))) { | 
 | 
            return true;  | 
 | 
        }  | 
 | 
        return false;  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
    /**  | 
 | 
     * Marks the first time a tag has been seen in a document  | 
 | 
     */  | 
 | 
 | 
 | 
    protected void markFirstTime(Element elem) { | 
 | 
        String elemName = elem.getName();  | 
 | 
        if (elemName.equals("html")) { | 
 | 
            seenHtml = true;  | 
 | 
        } else if (elemName.equals("head")) { | 
 | 
            seenHead = true;  | 
 | 
        } else if (elemName.equals("body")) { | 
 | 
            if (buf.length == 1) { | 
 | 
                  | 
 | 
                char[] newBuf = new char[256];  | 
 | 
 | 
 | 
                newBuf[0] = buf[0];  | 
 | 
                buf = newBuf;  | 
 | 
            }  | 
 | 
            seenBody = true;  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    boolean legalElementContext(Element elem) throws ChangedCharSetException { | 
 | 
 | 
 | 
        // System.out.println("-- legalContext -- " + elem); | 
 | 
 | 
 | 
          | 
 | 
        if (stack == null) { | 
 | 
              | 
 | 
            if (elem != dtd.html) { | 
 | 
                  | 
 | 
                startTag(makeTag(dtd.html, true));  | 
 | 
                return legalElementContext(elem);  | 
 | 
            }  | 
 | 
            return true;  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        if (stack.advance(elem)) { | 
 | 
              | 
 | 
            markFirstTime(elem);  | 
 | 
            return true;  | 
 | 
        }  | 
 | 
        boolean insertTag = false;  | 
 | 
 | 
 | 
        // The use of all error recovery strategies are contingent  | 
 | 
        // on the value of the strict property.  | 
 | 
        //  | 
 | 
        // These are commonly occurring errors.  if insertTag is true,  | 
 | 
        // then we want to adopt an error recovery strategy that  | 
 | 
        // involves attempting to insert an additional tag to  | 
 | 
        // legalize the context.  The two errors addressed here  | 
 | 
        // are:  | 
 | 
        // 1) when a <td> or <th> is seen soon after a <table> tag.  | 
 | 
        //    In this case we insert a <tr>.  | 
 | 
        // 2) when any other tag apart from a <tr> is seen  | 
 | 
        //    in the context of a <tr>.  In this case we would  | 
 | 
        //    like to add a <td>.  If a <tr> is seen within a  | 
 | 
        //    <tr> context, then we will close out the current  | 
 | 
        //    <tr>.  | 
 | 
        //  | 
 | 
        // This insertion strategy is handled later in the method.  | 
 | 
        // The reason for checking this now, is that in other cases  | 
 | 
        // we would like to apply other error recovery strategies for example  | 
 | 
        // ignoring tags.  | 
 | 
        //  | 
 | 
        // In certain cases it is better to ignore a tag than try to  | 
 | 
        // fix the situation.  So the first test is to see if this  | 
 | 
        // is what we need to do.  | 
 | 
          | 
 | 
        String stackElemName = stack.elem.getName();  | 
 | 
        String elemName = elem.getName();  | 
 | 
 | 
 | 
 | 
 | 
        if (!strict &&  | 
 | 
            ((stackElemName.equals("table") && elemName.equals("td")) || | 
 | 
             (stackElemName.equals("table") && elemName.equals("th")) || | 
 | 
             (stackElemName.equals("tr") && !elemName.equals("tr")))){ | 
 | 
             insertTag = true;  | 
 | 
        }  | 
 | 
 | 
 | 
 | 
 | 
        if (!strict && !insertTag && (stack.elem.getName() != elem.getName() ||  | 
 | 
                                      elem.getName().equals("body"))) { | 
 | 
            if (skipTag = ignoreElement(elem)) { | 
 | 
                error("tag.ignore", elem.getName()); | 
 | 
                return skipTag;  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        // Check for anything after the start of the table besides tr, td, th  | 
 | 
        // or caption, and if those aren't there, insert the <tr> and call  | 
 | 
          | 
 | 
        if (!strict && stackElemName.equals("table") && | 
 | 
            !elemName.equals("tr") && !elemName.equals("td") && | 
 | 
            !elemName.equals("th") && !elemName.equals("caption")) { | 
 | 
            Element e = dtd.getElement("tr"); | 
 | 
            TagElement t = makeTag(e, true);  | 
 | 
            legalTagContext(t);  | 
 | 
            startTag(t);  | 
 | 
            error("start.missing", elem.getName()); | 
 | 
            return legalElementContext(elem);  | 
 | 
        }  | 
 | 
 | 
 | 
        // They try to find a legal context by checking if the current  | 
 | 
        // tag is valid in an enclosing context.  If so  | 
 | 
        // close out the tags by outputing end tags and then  | 
 | 
        // insert the current tag.  If the tags that are  | 
 | 
        // being closed out do not have an optional end tag  | 
 | 
        // specification in the DTD then an html error is  | 
 | 
        // reported.  | 
 | 
          | 
 | 
        if (!insertTag && stack.terminate() && (!strict || stack.elem.omitEnd())) { | 
 | 
            for (TagStack s = stack.next ; s != null ; s = s.next) { | 
 | 
                if (s.advance(elem)) { | 
 | 
                    while (stack != s) { | 
 | 
                        endTag(true);  | 
 | 
                    }  | 
 | 
                    return true;  | 
 | 
                }  | 
 | 
                if (!s.terminate() || (strict && !s.elem.omitEnd())) { | 
 | 
                    break;  | 
 | 
                }  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        // Check if we know what tag is expected next.  | 
 | 
        // If so insert the tag.  Report an error if the  | 
 | 
        // tag does not have its start tag spec in the DTD as optional.  | 
 | 
          | 
 | 
        Element next = stack.first();  | 
 | 
        if (next != null && (!strict || next.omitStart()) &&  | 
 | 
           !(next==dtd.head && elem==dtd.pcdata) ) { | 
 | 
              | 
 | 
            TagElement t = makeTag(next, true);  | 
 | 
            legalTagContext(t);  | 
 | 
            startTag(t);  | 
 | 
            if (!next.omitStart()) { | 
 | 
                error("start.missing", elem.getName()); | 
 | 
            }  | 
 | 
            return legalElementContext(elem);  | 
 | 
        }  | 
 | 
 | 
 | 
 | 
 | 
        // Traverse the list of expected elements and determine if adding  | 
 | 
        // any of these elements would make for a legal context.  | 
 | 
        //  | 
 | 
 | 
 | 
        if (!strict) { | 
 | 
            ContentModel content = stack.contentModel();  | 
 | 
            Vector<Element> elemVec = new Vector<Element>();  | 
 | 
            if (content != null) { | 
 | 
                content.getElements(elemVec);  | 
 | 
                for (Element e : elemVec) { | 
 | 
                    // Ensure that this element has not been included as  | 
 | 
                    // part of the exclusions in the DTD.  | 
 | 
                      | 
 | 
                    if (stack.excluded(e.getIndex())) { | 
 | 
                        continue;  | 
 | 
                    }  | 
 | 
 | 
 | 
                    boolean reqAtts = false;  | 
 | 
 | 
 | 
                    for (AttributeList a = e.getAttributes(); a != null ; a = a.next) { | 
 | 
                        if (a.modifier == REQUIRED) { | 
 | 
                            reqAtts = true;  | 
 | 
                            break;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    // Ensure that no tag that has required attributes  | 
 | 
                    // gets inserted.  | 
 | 
                      | 
 | 
                    if (reqAtts) { | 
 | 
                        continue;  | 
 | 
                    }  | 
 | 
 | 
 | 
                    ContentModel m = e.getContent();  | 
 | 
                    if (m != null && m.first(elem)) { | 
 | 
                          | 
 | 
                        TagElement t = makeTag(e, true);  | 
 | 
                        legalTagContext(t);  | 
 | 
                        startTag(t);  | 
 | 
                        error("start.missing", e.getName()); | 
 | 
                        return legalElementContext(elem);  | 
 | 
                    }  | 
 | 
                }  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        // Check if the stack can be terminated.  If so add the appropriate  | 
 | 
        // end tag.  Report an error if the tag being ended does not have its  | 
 | 
        // end tag spec in the DTD as optional.  | 
 | 
          | 
 | 
        if (stack.terminate() && (stack.elem != dtd.body) && (!strict || stack.elem.omitEnd())) { | 
 | 
              | 
 | 
            if (!stack.elem.omitEnd()) { | 
 | 
                error("end.missing", elem.getName()); | 
 | 
            }  | 
 | 
 | 
 | 
            endTag(true);  | 
 | 
            return legalElementContext(elem);  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        return false;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void legalTagContext(TagElement tag) throws ChangedCharSetException { | 
 | 
        if (legalElementContext(tag.getElement())) { | 
 | 
            markFirstTime(tag.getElement());  | 
 | 
            return;  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        if (tag.breaksFlow() && (stack != null) && !stack.tag.breaksFlow()) { | 
 | 
            endTag(true);  | 
 | 
            legalTagContext(tag);  | 
 | 
            return;  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        for (TagStack s = stack ; s != null ; s = s.next) { | 
 | 
            if (s.tag.getElement() == dtd.head) { | 
 | 
                while (stack != s) { | 
 | 
                    endTag(true);  | 
 | 
                }  | 
 | 
                endTag(true);  | 
 | 
                legalTagContext(tag);  | 
 | 
                return;  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        error("tag.unexpected", tag.getElement().getName()); | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    void errorContext() throws ChangedCharSetException { | 
 | 
        for (; (stack != null) && (stack.tag.getElement() != dtd.body) ; stack = stack.next) { | 
 | 
            handleEndTag(stack.tag);  | 
 | 
        }  | 
 | 
        if (stack == null) { | 
 | 
            legalElementContext(dtd.body);  | 
 | 
            startTag(makeTag(dtd.body, true));  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void addString(int c) { | 
 | 
        if (strpos  == str.length) { | 
 | 
            char newstr[] = new char[str.length + 128];  | 
 | 
            System.arraycopy(str, 0, newstr, 0, str.length);  | 
 | 
            str = newstr;  | 
 | 
        }  | 
 | 
        str[strpos++] = (char)c;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    String getString(int pos) { | 
 | 
        char newStr[] = new char[strpos - pos];  | 
 | 
        System.arraycopy(str, pos, newStr, 0, strpos - pos);  | 
 | 
        strpos = pos;  | 
 | 
        return new String(newStr);  | 
 | 
    }  | 
 | 
 | 
 | 
    char[] getChars(int pos) { | 
 | 
        char newStr[] = new char[strpos - pos];  | 
 | 
        System.arraycopy(str, pos, newStr, 0, strpos - pos);  | 
 | 
        strpos = pos;  | 
 | 
        return newStr;  | 
 | 
    }  | 
 | 
 | 
 | 
    char[] getChars(int pos, int endPos) { | 
 | 
        char newStr[] = new char[endPos - pos];  | 
 | 
        System.arraycopy(str, pos, newStr, 0, endPos - pos);  | 
 | 
        // REMIND: it's not clear whether this version should set strpos or not  | 
 | 
          | 
 | 
        return newStr;  | 
 | 
    }  | 
 | 
 | 
 | 
    void resetStrBuffer() { | 
 | 
        strpos = 0;  | 
 | 
    }  | 
 | 
 | 
 | 
    int strIndexOf(char target) { | 
 | 
        for (int i = 0; i < strpos; i++) { | 
 | 
            if (str[i] == target) { | 
 | 
                return i;  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
        return -1;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    void skipSpace() throws IOException { | 
 | 
        while (true) { | 
 | 
            switch (ch) { | 
 | 
              case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\r':  | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                break;  | 
 | 
              case ' ':  | 
 | 
              case '\t':  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
 | 
 | 
              default:  | 
 | 
                return;  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    boolean parseIdentifier(boolean lower) throws IOException { | 
 | 
        switch (ch) { | 
 | 
          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':  | 
 | 
          case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':  | 
 | 
          case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':  | 
 | 
          case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':  | 
 | 
          case 'Y': case 'Z':  | 
 | 
            if (lower) { | 
 | 
                ch = 'a' + (ch - 'A');  | 
 | 
            }  | 
 | 
 | 
 | 
          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':  | 
 | 
          case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':  | 
 | 
          case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':  | 
 | 
          case 's': case 't': case 'u': case 'v': case 'w': case 'x':  | 
 | 
          case 'y': case 'z':  | 
 | 
            break;  | 
 | 
 | 
 | 
          default:  | 
 | 
            return false;  | 
 | 
        }  | 
 | 
 | 
 | 
        while (true) { | 
 | 
            addString(ch);  | 
 | 
 | 
 | 
            switch (ch = readCh()) { | 
 | 
              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':  | 
 | 
              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':  | 
 | 
              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':  | 
 | 
              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':  | 
 | 
              case 'Y': case 'Z':  | 
 | 
                if (lower) { | 
 | 
                    ch = 'a' + (ch - 'A');  | 
 | 
                }  | 
 | 
 | 
 | 
              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':  | 
 | 
              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':  | 
 | 
              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':  | 
 | 
              case 's': case 't': case 'u': case 'v': case 'w': case 'x':  | 
 | 
              case 'y': case 'z':  | 
 | 
 | 
 | 
              case '0': case '1': case '2': case '3': case '4':  | 
 | 
              case '5': case '6': case '7': case '8': case '9':  | 
 | 
 | 
 | 
              case '.': case '-':  | 
 | 
 | 
 | 
              case '_':   | 
 | 
                break;  | 
 | 
 | 
 | 
              default:  | 
 | 
                return true;  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    private char[] parseEntityReference() throws IOException { | 
 | 
        int pos = strpos;  | 
 | 
 | 
 | 
        if ((ch = readCh()) == '#') { | 
 | 
            int n = 0;  | 
 | 
            ch = readCh();  | 
 | 
            if ((ch >= '0') && (ch <= '9') ||  | 
 | 
                    ch == 'x' || ch == 'X') { | 
 | 
 | 
 | 
                if ((ch >= '0') && (ch <= '9')) { | 
 | 
                      | 
 | 
                    while ((ch >= '0') && (ch <= '9')) { | 
 | 
                        n = (n * 10) + ch - '0';  | 
 | 
                        ch = readCh();  | 
 | 
                    }  | 
 | 
                } else { | 
 | 
                      | 
 | 
                    ch = readCh();  | 
 | 
                    char lch = (char) Character.toLowerCase(ch);  | 
 | 
                    while ((lch >= '0') && (lch <= '9') ||  | 
 | 
                            (lch >= 'a') && (lch <= 'f')) { | 
 | 
                        if (lch >= '0' && lch <= '9') { | 
 | 
                            n = (n * 16) + lch - '0';  | 
 | 
                        } else { | 
 | 
                            n = (n * 16) + lch - 'a' + 10;  | 
 | 
                        }  | 
 | 
                        ch = readCh();  | 
 | 
                        lch = (char) Character.toLowerCase(ch);  | 
 | 
                    }  | 
 | 
                }  | 
 | 
                switch (ch) { | 
 | 
                    case '\n':  | 
 | 
                        ln++;  | 
 | 
                        ch = readCh();  | 
 | 
                        lfCount++;  | 
 | 
                        break;  | 
 | 
 | 
 | 
                    case '\r':  | 
 | 
                        ln++;  | 
 | 
                        if ((ch = readCh()) == '\n') { | 
 | 
                            ch = readCh();  | 
 | 
                            crlfCount++;  | 
 | 
                        }  | 
 | 
                        else { | 
 | 
                            crCount++;  | 
 | 
                        }  | 
 | 
                        break;  | 
 | 
 | 
 | 
                    case ';':  | 
 | 
                        ch = readCh();  | 
 | 
                        break;  | 
 | 
                }  | 
 | 
                char data[] = mapNumericReference(n);  | 
 | 
                return data;  | 
 | 
            }  | 
 | 
            addString('#'); | 
 | 
            if (!parseIdentifier(false)) { | 
 | 
                error("ident.expected"); | 
 | 
                strpos = pos;  | 
 | 
                char data[] = {'&', '#'}; | 
 | 
                return data;  | 
 | 
            }  | 
 | 
        } else if (!parseIdentifier(false)) { | 
 | 
            char data[] = {'&'}; | 
 | 
            return data;  | 
 | 
        }  | 
 | 
 | 
 | 
        boolean semicolon = false;  | 
 | 
 | 
 | 
        switch (ch) { | 
 | 
          case '\n':  | 
 | 
            ln++;  | 
 | 
            ch = readCh();  | 
 | 
            lfCount++;  | 
 | 
            break;  | 
 | 
 | 
 | 
          case '\r':  | 
 | 
            ln++;  | 
 | 
            if ((ch = readCh()) == '\n') { | 
 | 
                ch = readCh();  | 
 | 
                crlfCount++;  | 
 | 
            }  | 
 | 
            else { | 
 | 
                crCount++;  | 
 | 
            }  | 
 | 
            break;  | 
 | 
 | 
 | 
          case ';':  | 
 | 
            semicolon = true;  | 
 | 
 | 
 | 
            ch = readCh();  | 
 | 
            break;  | 
 | 
        }  | 
 | 
 | 
 | 
        String nm = getString(pos);  | 
 | 
        Entity ent = dtd.getEntity(nm);  | 
 | 
 | 
 | 
        // entities are case sensitive - however if strict  | 
 | 
        // is false then we will try to make a match by  | 
 | 
        // converting the string to all lowercase.  | 
 | 
          | 
 | 
        if (!strict && (ent == null)) { | 
 | 
            ent = dtd.getEntity(nm.toLowerCase());  | 
 | 
        }  | 
 | 
        if ((ent == null) || !ent.isGeneral()) { | 
 | 
 | 
 | 
            if (nm.length() == 0) { | 
 | 
                error("invalid.entref", nm); | 
 | 
                return new char[0];  | 
 | 
            }  | 
 | 
              | 
 | 
            String str = "&" + nm + (semicolon ? ";" : "");  | 
 | 
 | 
 | 
            char b[] = new char[str.length()];  | 
 | 
            str.getChars(0, b.length, b, 0);  | 
 | 
            return b;  | 
 | 
        }  | 
 | 
        return ent.getData();  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    private char[] mapNumericReference(int c) { | 
 | 
        char[] data;  | 
 | 
        if (c >= 0xffff) {  | 
 | 
            try { | 
 | 
                data = Character.toChars(c);  | 
 | 
            } catch (IllegalArgumentException e) { | 
 | 
                data = new char[0];  | 
 | 
            }  | 
 | 
        } else { | 
 | 
            data = new char[1];  | 
 | 
            data[0] = (c < 130 || c > 159) ? (char) c : cp1252Map[c - 130];  | 
 | 
        }  | 
 | 
        return data;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseComment() throws IOException { | 
 | 
 | 
 | 
        while (true) { | 
 | 
            int c = ch;  | 
 | 
            switch (c) { | 
 | 
              case '-':  | 
 | 
                    | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
                   **/  | 
 | 
                if (!strict && (strpos != 0) && (str[strpos - 1] == '-')) { | 
 | 
                    if ((ch = readCh()) == '>') { | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                    if (ch == '!') { | 
 | 
                        if ((ch = readCh()) == '>') { | 
 | 
                            return;  | 
 | 
                        } else { | 
 | 
                              | 
 | 
                            addString('-'); | 
 | 
                            addString('!'); | 
 | 
                            continue;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    break;  | 
 | 
                }  | 
 | 
 | 
 | 
                if ((ch = readCh()) == '-') { | 
 | 
                    ch = readCh();  | 
 | 
                    if (strict || ch == '>') { | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                    if (ch == '!') { | 
 | 
                        if ((ch = readCh()) == '>') { | 
 | 
                            return;  | 
 | 
                        } else { | 
 | 
                              | 
 | 
                            addString('-'); | 
 | 
                            addString('!'); | 
 | 
                            continue;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                      | 
 | 
                    addString('-'); | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
              case -1:  | 
 | 
                  handleEOFInComment();  | 
 | 
                  return;  | 
 | 
 | 
 | 
              case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '>':  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\r':  | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                c = '\n';  | 
 | 
                break;  | 
 | 
              default:  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            }  | 
 | 
 | 
 | 
            addString(c);  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseLiteral(boolean replace) throws IOException { | 
 | 
        while (true) { | 
 | 
            int c = ch;  | 
 | 
            switch (c) { | 
 | 
              case -1:  | 
 | 
                error("eof.literal", stack.elem.getName()); | 
 | 
                endTag(true);  | 
 | 
                return;  | 
 | 
 | 
 | 
              case '>':  | 
 | 
                ch = readCh();  | 
 | 
                int i = textpos - (stack.elem.name.length() + 2), j = 0;  | 
 | 
 | 
 | 
                  | 
 | 
                if ((i >= 0) && (text[i++] == '<') && (text[i] == '/')) { | 
 | 
                    while ((++i < textpos) &&  | 
 | 
                           (Character.toLowerCase(text[i]) == stack.elem.name.charAt(j++)));  | 
 | 
                    if (i == textpos) { | 
 | 
                        textpos -= (stack.elem.name.length() + 2);  | 
 | 
                        if ((textpos > 0) && (text[textpos-1] == '\n')) { | 
 | 
                            textpos--;  | 
 | 
                        }  | 
 | 
                        endTag(false);  | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '&':  | 
 | 
                char data[] = parseEntityReference();  | 
 | 
                if (textpos + data.length > text.length) { | 
 | 
                    char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];  | 
 | 
                    System.arraycopy(text, 0, newtext, 0, text.length);  | 
 | 
                    text = newtext;  | 
 | 
                }  | 
 | 
                System.arraycopy(data, 0, text, textpos, data.length);  | 
 | 
                textpos += data.length;  | 
 | 
                continue;  | 
 | 
 | 
 | 
              case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\r':  | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                c = '\n';  | 
 | 
                break;  | 
 | 
              default:  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if (textpos == text.length) { | 
 | 
                char newtext[] = new char[text.length + 128];  | 
 | 
                System.arraycopy(text, 0, newtext, 0, text.length);  | 
 | 
                text = newtext;  | 
 | 
            }  | 
 | 
            text[textpos++] = (char)c;  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    String parseAttributeValue(boolean lower) throws IOException { | 
 | 
        int delim = -1;  | 
 | 
 | 
 | 
          | 
 | 
        switch(ch) { | 
 | 
          case '\'':  | 
 | 
          case '"':  | 
 | 
            delim = ch;  | 
 | 
            ch = readCh();  | 
 | 
            break;  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        while (true) { | 
 | 
            int c = ch;  | 
 | 
 | 
 | 
            switch (c) { | 
 | 
              case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                if (delim < 0) { | 
 | 
                    return getString(0);  | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\r':  | 
 | 
                ln++;  | 
 | 
 | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                if (delim < 0) { | 
 | 
                    return getString(0);  | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\t':  | 
 | 
                  if (delim < 0)  | 
 | 
                      c = ' ';  | 
 | 
              case ' ':  | 
 | 
                ch = readCh();  | 
 | 
                if (delim < 0) { | 
 | 
                    return getString(0);  | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '>':  | 
 | 
              case '<':  | 
 | 
                if (delim < 0) { | 
 | 
                    return getString(0);  | 
 | 
                }  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '\'':  | 
 | 
              case '"':  | 
 | 
                ch = readCh();  | 
 | 
                if (c == delim) { | 
 | 
                    return getString(0);  | 
 | 
                } else if (delim == -1) { | 
 | 
                    error("attvalerr"); | 
 | 
                    if (strict || ch == ' ') { | 
 | 
                        return getString(0);  | 
 | 
                    } else { | 
 | 
                        continue;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
                break;  | 
 | 
 | 
 | 
            case '=':  | 
 | 
                if (delim < 0) { | 
 | 
                      | 
 | 
 | 
 | 
 | 
 | 
                       */  | 
 | 
                    error("attvalerr"); | 
 | 
                      | 
 | 
 | 
 | 
                       process the rest of the img tag. */  | 
 | 
                    if (strict) { | 
 | 
                        return getString(0);  | 
 | 
                    }  | 
 | 
                }  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
 | 
 | 
              case '&':  | 
 | 
                if (strict && delim < 0) { | 
 | 
                    ch = readCh();  | 
 | 
                    break;  | 
 | 
                }  | 
 | 
 | 
 | 
                char data[] = parseEntityReference();  | 
 | 
                for (int i = 0 ; i < data.length ; i++) { | 
 | 
                    c = data[i];  | 
 | 
                    addString((lower && (c >= 'A') && (c <= 'Z')) ? 'a' + c - 'A' : c);  | 
 | 
                }  | 
 | 
                continue;  | 
 | 
 | 
 | 
              case -1:  | 
 | 
                return getString(0);  | 
 | 
 | 
 | 
              default:  | 
 | 
                if (lower && (c >= 'A') && (c <= 'Z')) { | 
 | 
                    c = 'a' + c - 'A';  | 
 | 
                }  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            }  | 
 | 
            addString(c);  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseAttributeSpecificationList(Element elem) throws IOException { | 
 | 
 | 
 | 
        while (true) { | 
 | 
            skipSpace();  | 
 | 
 | 
 | 
            switch (ch) { | 
 | 
              case '/':  | 
 | 
              case '>':  | 
 | 
              case '<':  | 
 | 
              case -1:  | 
 | 
                return;  | 
 | 
 | 
 | 
              case '-':  | 
 | 
                if ((ch = readCh()) == '-') { | 
 | 
                    ch = readCh();  | 
 | 
                    parseComment();  | 
 | 
                    strpos = 0;  | 
 | 
                } else { | 
 | 
                    error("invalid.tagchar", "-", elem.getName()); | 
 | 
                    ch = readCh();  | 
 | 
                }  | 
 | 
                continue;  | 
 | 
            }  | 
 | 
 | 
 | 
            AttributeList att;  | 
 | 
            String attname;  | 
 | 
            String attvalue;  | 
 | 
 | 
 | 
            if (parseIdentifier(true)) { | 
 | 
                attname = getString(0);  | 
 | 
                skipSpace();  | 
 | 
                if (ch == '=') { | 
 | 
                    ch = readCh();  | 
 | 
                    skipSpace();  | 
 | 
                    att = elem.getAttribute(attname);  | 
 | 
//  Bug ID 4102750  | 
 | 
//  Load the NAME of an Attribute Case Sensitive  | 
 | 
//  The case of the NAME  must be intact  | 
 | 
 | 
 | 
                    attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION) && (att.type != NAME));  | 
 | 
//                  attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION));  | 
 | 
                } else { | 
 | 
                    attvalue = attname;  | 
 | 
                    att = elem.getAttributeByValue(attvalue);  | 
 | 
                    if (att == null) { | 
 | 
                        att = elem.getAttribute(attname);  | 
 | 
                        if (att != null) { | 
 | 
                            attvalue = att.getValue();  | 
 | 
                        }  | 
 | 
                        else { | 
 | 
                            // Make it null so that NULL_ATTRIBUTE_VALUE is  | 
 | 
                              | 
 | 
                            attvalue = null;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                }  | 
 | 
            } else if (!strict && ch == ',') {  | 
 | 
                ch = readCh();  | 
 | 
                continue;  | 
 | 
            } else if (!strict && ch == '"') {  | 
 | 
                ch = readCh();  | 
 | 
                skipSpace();  | 
 | 
                if (parseIdentifier(true)) { | 
 | 
                    attname = getString(0);  | 
 | 
                    if (ch == '"') { | 
 | 
                        ch = readCh();  | 
 | 
                    }  | 
 | 
                    skipSpace();  | 
 | 
                    if (ch == '=') { | 
 | 
                        ch = readCh();  | 
 | 
                        skipSpace();  | 
 | 
                        att = elem.getAttribute(attname);  | 
 | 
                        attvalue = parseAttributeValue((att != null) &&  | 
 | 
                                                (att.type != CDATA) &&  | 
 | 
                                                (att.type != NOTATION));  | 
 | 
                    } else { | 
 | 
                        attvalue = attname;  | 
 | 
                        att = elem.getAttributeByValue(attvalue);  | 
 | 
                        if (att == null) { | 
 | 
                            att = elem.getAttribute(attname);  | 
 | 
                            if (att != null) { | 
 | 
                                attvalue = att.getValue();  | 
 | 
                            }  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                } else { | 
 | 
                    char str[] = {(char)ch}; | 
 | 
                    error("invalid.tagchar", new String(str), elem.getName()); | 
 | 
                    ch = readCh();  | 
 | 
                    continue;  | 
 | 
                }  | 
 | 
            } else if (!strict && (attributes.isEmpty()) && (ch == '=')) { | 
 | 
                ch = readCh();  | 
 | 
                skipSpace();  | 
 | 
                attname = elem.getName();  | 
 | 
                att = elem.getAttribute(attname);  | 
 | 
                attvalue = parseAttributeValue((att != null) &&  | 
 | 
                                               (att.type != CDATA) &&  | 
 | 
                                               (att.type != NOTATION));  | 
 | 
            } else if (!strict && (ch == '=')) { | 
 | 
                ch = readCh();  | 
 | 
                skipSpace();  | 
 | 
                attvalue = parseAttributeValue(true);  | 
 | 
                error("attvalerr"); | 
 | 
                return;  | 
 | 
            } else { | 
 | 
                char str[] = {(char)ch}; | 
 | 
                error("invalid.tagchar", new String(str), elem.getName()); | 
 | 
                if (!strict) { | 
 | 
                    ch = readCh();  | 
 | 
                    continue;  | 
 | 
                } else { | 
 | 
                    return;  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
            if (att != null) { | 
 | 
                attname = att.getName();  | 
 | 
            } else { | 
 | 
                error("invalid.tagatt", attname, elem.getName()); | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if (attributes.isDefined(attname)) { | 
 | 
                error("multi.tagatt", attname, elem.getName()); | 
 | 
            }  | 
 | 
            if (attvalue == null) { | 
 | 
                attvalue = ((att != null) && (att.value != null)) ? att.value :  | 
 | 
                    HTML.NULL_ATTRIBUTE_VALUE;  | 
 | 
            } else if ((att != null) && (att.values != null) && !att.values.contains(attvalue)) { | 
 | 
                error("invalid.tagattval", attname, elem.getName()); | 
 | 
            }  | 
 | 
            HTML.Attribute attkey = HTML.getAttributeKey(attname);  | 
 | 
            if (attkey == null) { | 
 | 
                attributes.addAttribute(attname, attvalue);  | 
 | 
            } else { | 
 | 
                attributes.addAttribute(attkey, attvalue);  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    public String parseDTDMarkup() throws IOException { | 
 | 
 | 
 | 
        StringBuilder strBuff = new StringBuilder();  | 
 | 
        ch = readCh();  | 
 | 
        while(true) { | 
 | 
            switch (ch) { | 
 | 
            case '>':  | 
 | 
                ch = readCh();  | 
 | 
                return strBuff.toString();  | 
 | 
            case -1:  | 
 | 
                error("invalid.markup"); | 
 | 
                return strBuff.toString();  | 
 | 
            case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                break;  | 
 | 
            case '"':  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            case '\r':  | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                break;  | 
 | 
            default:  | 
 | 
                strBuff.append((char)(ch & 0xFF));  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    protected boolean parseMarkupDeclarations(StringBuffer strBuff) throws IOException { | 
 | 
 | 
 | 
          | 
 | 
        if ((strBuff.length() == "DOCTYPE".length()) &&  | 
 | 
            (strBuff.toString().toUpperCase().equals("DOCTYPE"))) { | 
 | 
            parseDTDMarkup();  | 
 | 
            return true;  | 
 | 
        }  | 
 | 
        return false;  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseInvalidTag() throws IOException { | 
 | 
          | 
 | 
        while (true) { | 
 | 
            skipSpace();  | 
 | 
            switch (ch) { | 
 | 
              case '>':  | 
 | 
              case -1:  | 
 | 
                  ch = readCh();  | 
 | 
                return;  | 
 | 
              case '<':  | 
 | 
                  return;  | 
 | 
              default:  | 
 | 
                  ch = readCh();  | 
 | 
 | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseTag() throws IOException { | 
 | 
        Element elem;  | 
 | 
        boolean net = false;  | 
 | 
        boolean warned = false;  | 
 | 
        boolean unknown = false;  | 
 | 
 | 
 | 
        switch (ch = readCh()) { | 
 | 
          case '!':  | 
 | 
            switch (ch = readCh()) { | 
 | 
              case '-':  | 
 | 
                  | 
 | 
                while (true) { | 
 | 
                    if (ch == '-') { | 
 | 
                        if (!strict || ((ch = readCh()) == '-')) { | 
 | 
                            ch = readCh();  | 
 | 
                            if (!strict && ch == '-') { | 
 | 
                                ch = readCh();  | 
 | 
                            }  | 
 | 
                            // send over any text you might see  | 
 | 
                            // before parsing and sending the  | 
 | 
                              | 
 | 
                            if (textpos != 0) { | 
 | 
                                char newtext[] = new char[textpos];  | 
 | 
                                System.arraycopy(text, 0, newtext, 0, textpos);  | 
 | 
                                handleText(newtext);  | 
 | 
                                lastBlockStartPos = currentBlockStartPos;  | 
 | 
                                textpos = 0;  | 
 | 
                            }  | 
 | 
                            parseComment();  | 
 | 
                            last = makeTag(dtd.getElement("comment"), true); | 
 | 
                            handleComment(getChars(0));  | 
 | 
                            continue;  | 
 | 
                        } else if (!warned) { | 
 | 
                            warned = true;  | 
 | 
                            error("invalid.commentchar", "-"); | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    skipSpace();  | 
 | 
                    switch (ch) { | 
 | 
                      case '-':  | 
 | 
                        continue;  | 
 | 
                      case '>':  | 
 | 
                        ch = readCh();  | 
 | 
                      case -1:  | 
 | 
                        return;  | 
 | 
                      default:  | 
 | 
                        ch = readCh();  | 
 | 
                        if (!warned) { | 
 | 
                            warned = true;  | 
 | 
                            error("invalid.commentchar", | 
 | 
                                  String.valueOf((char)ch));  | 
 | 
                        }  | 
 | 
                        break;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
 | 
 | 
              default:  | 
 | 
                  | 
 | 
                StringBuffer strBuff = new StringBuffer();  | 
 | 
                while (true) { | 
 | 
                    strBuff.append((char)ch);  | 
 | 
                    if (parseMarkupDeclarations(strBuff)) { | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                    switch(ch) { | 
 | 
                      case '>':  | 
 | 
                        ch = readCh();  | 
 | 
                      case -1:  | 
 | 
                        error("invalid.markup"); | 
 | 
                        return;  | 
 | 
                      case '\n':  | 
 | 
                        ln++;  | 
 | 
                        ch = readCh();  | 
 | 
                        lfCount++;  | 
 | 
                        break;  | 
 | 
                      case '\r':  | 
 | 
                        ln++;  | 
 | 
                        if ((ch = readCh()) == '\n') { | 
 | 
                            ch = readCh();  | 
 | 
                            crlfCount++;  | 
 | 
                        }  | 
 | 
                        else { | 
 | 
                            crCount++;  | 
 | 
                        }  | 
 | 
                        break;  | 
 | 
 | 
 | 
                      default:  | 
 | 
                        ch = readCh();  | 
 | 
                        break;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
          case '/':  | 
 | 
              | 
 | 
            switch (ch = readCh()) { | 
 | 
              case '>':  | 
 | 
                ch = readCh();  | 
 | 
              case '<':  | 
 | 
                  | 
 | 
                if (recent == null) { | 
 | 
                    error("invalid.shortend"); | 
 | 
                    return;  | 
 | 
                }  | 
 | 
                elem = recent;  | 
 | 
                break;  | 
 | 
 | 
 | 
              default:  | 
 | 
                if (!parseIdentifier(true)) { | 
 | 
                    error("expected.endtagname"); | 
 | 
                    return;  | 
 | 
                }  | 
 | 
                skipSpace();  | 
 | 
                switch (ch) { | 
 | 
                  case '>':  | 
 | 
                    ch = readCh();  | 
 | 
                  case '<':  | 
 | 
                    break;  | 
 | 
 | 
 | 
                  default:  | 
 | 
                    error("expected", "'>'"); | 
 | 
                    while ((ch != -1) && (ch != '\n') && (ch != '>')) { | 
 | 
                        ch = readCh();  | 
 | 
                    }  | 
 | 
                    if (ch == '>') { | 
 | 
                        ch = readCh();  | 
 | 
                    }  | 
 | 
                    break;  | 
 | 
                }  | 
 | 
                String elemStr = getString(0);  | 
 | 
                if (!dtd.elementExists(elemStr)) { | 
 | 
                    error("end.unrecognized", elemStr); | 
 | 
                      | 
 | 
                    if ((textpos > 0) && (text[textpos-1] == '\n')) { | 
 | 
                        textpos--;  | 
 | 
                    }  | 
 | 
                    elem = dtd.getElement("unknown"); | 
 | 
                    elem.name = elemStr;  | 
 | 
                    unknown = true;  | 
 | 
                } else { | 
 | 
                    elem = dtd.getElement(elemStr);  | 
 | 
                }  | 
 | 
                break;  | 
 | 
            }  | 
 | 
 | 
 | 
 | 
 | 
            // If the stack is null, we're seeing end tags without any begin  | 
 | 
            // tags.  Ignore them.  | 
 | 
 | 
 | 
            if (stack == null) { | 
 | 
                error("end.extra.tag", elem.getName()); | 
 | 
                return;  | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if ((textpos > 0) && (text[textpos-1] == '\n')) { | 
 | 
                // In a pre tag, if there are blank lines  | 
 | 
                // we do not want to remove the newline  | 
 | 
                // before the end tag.  Hence this code.  | 
 | 
                  | 
 | 
                if (stack.pre) { | 
 | 
                    if ((textpos > 1) && (text[textpos-2] != '\n')) { | 
 | 
                        textpos--;  | 
 | 
                    }  | 
 | 
                } else { | 
 | 
                    textpos--;  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
            // If the end tag is a form, since we did not put it  | 
 | 
            // on the tag stack, there is no corresponding start  | 
 | 
            // start tag to find. Hence do not touch the tag stack.  | 
 | 
            //  | 
 | 
 | 
 | 
            /*  | 
 | 
            if (!strict && elem.getName().equals("form")) { | 
 | 
                if (lastFormSent != null) { | 
 | 
                    handleEndTag(lastFormSent);  | 
 | 
                    return;  | 
 | 
                } else { | 
 | 
                    // do nothing.  | 
 | 
                    return;  | 
 | 
                }  | 
 | 
            }  | 
 | 
            */  | 
 | 
 | 
 | 
            if (unknown) { | 
 | 
                // we will not see a corresponding start tag  | 
 | 
                // on the the stack.  If we are seeing an  | 
 | 
                // end tag, lets send this on as an empty  | 
 | 
                // tag with the end tag attribute set to  | 
 | 
                  | 
 | 
                TagElement t = makeTag(elem);  | 
 | 
                handleText(t);  | 
 | 
                attributes.addAttribute(HTML.Attribute.ENDTAG, "true");  | 
 | 
                handleEmptyTag(makeTag(elem));  | 
 | 
                unknown = false;  | 
 | 
                return;  | 
 | 
            }  | 
 | 
 | 
 | 
            // find the corresponding start tag  | 
 | 
 | 
 | 
            // A commonly occurring error appears to be the insertion  | 
 | 
            // of extra end tags in a table.  The intent here is ignore  | 
 | 
            // such extra end tags.  | 
 | 
              | 
 | 
            if (!strict) { | 
 | 
                String stackElem = stack.elem.getName();  | 
 | 
 | 
 | 
                if (stackElem.equals("table")) { | 
 | 
                    // If it is not a valid end tag ignore it and return  | 
 | 
                      | 
 | 
                    if (!elem.getName().equals(stackElem)) { | 
 | 
                        error("tag.ignore", elem.getName()); | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
                if (stackElem.equals("tr") || | 
 | 
                    stackElem.equals("td")) { | 
 | 
                    if ((!elem.getName().equals("table")) && | 
 | 
                        (!elem.getName().equals(stackElem))) { | 
 | 
                        error("tag.ignore", elem.getName()); | 
 | 
                        return;  | 
 | 
                    }  | 
 | 
                }  | 
 | 
            }  | 
 | 
            TagStack sp = stack;  | 
 | 
 | 
 | 
            while ((sp != null) && (elem != sp.elem)) { | 
 | 
                sp = sp.next;  | 
 | 
            }  | 
 | 
            if (sp == null) { | 
 | 
                error("unmatched.endtag", elem.getName()); | 
 | 
                return;  | 
 | 
            }  | 
 | 
 | 
 | 
            // People put font ending tags in the darndest places.  | 
 | 
            // Don't close other contexts based on them being between  | 
 | 
            // a font tag and the corresponding end tag.  Instead,  | 
 | 
            // ignore the end tag like it doesn't exist and allow the end  | 
 | 
              | 
 | 
            String elemName = elem.getName();  | 
 | 
            if (stack != sp &&  | 
 | 
                (elemName.equals("font") || | 
 | 
                 elemName.equals("center"))) { | 
 | 
 | 
 | 
                // Since closing out a center tag can have real wierd  | 
 | 
                // effects on the formatting,  make sure that tags  | 
 | 
                // for which omitting an end tag is legimitate  | 
 | 
                // get closed out.  | 
 | 
                  | 
 | 
                if (elemName.equals("center")) { | 
 | 
                    while(stack.elem.omitEnd() && stack != sp) { | 
 | 
                        endTag(true);  | 
 | 
                    }  | 
 | 
                    if (stack.elem == elem) { | 
 | 
                        endTag(false);  | 
 | 
                    }  | 
 | 
                }  | 
 | 
                return;  | 
 | 
            }  | 
 | 
            // People do the same thing with center tags.  In this  | 
 | 
            // case we would like to close off the center tag but  | 
 | 
            // not necessarily all enclosing tags.  | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
              | 
 | 
            while (stack != sp) { | 
 | 
                endTag(true);  | 
 | 
            }  | 
 | 
 | 
 | 
            endTag(false);  | 
 | 
            return;  | 
 | 
 | 
 | 
          case -1:  | 
 | 
            error("eof"); | 
 | 
            return;  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        if (!parseIdentifier(true)) { | 
 | 
            elem = recent;  | 
 | 
            if ((ch != '>') || (elem == null)) { | 
 | 
                error("expected.tagname"); | 
 | 
                return;  | 
 | 
            }  | 
 | 
        } else { | 
 | 
            String elemStr = getString(0);  | 
 | 
 | 
 | 
            if (elemStr.equals("image")) { | 
 | 
                elemStr = "img";  | 
 | 
            }  | 
 | 
 | 
 | 
            /* determine if this element is part of the dtd. */  | 
 | 
 | 
 | 
            if (!dtd.elementExists(elemStr)) { | 
 | 
                  | 
 | 
                error("tag.unrecognized ", elemStr); | 
 | 
                elem = dtd.getElement("unknown"); | 
 | 
                elem.name = elemStr;  | 
 | 
                unknown = true;  | 
 | 
            } else { | 
 | 
                elem = dtd.getElement(elemStr);  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        parseAttributeSpecificationList(elem);  | 
 | 
 | 
 | 
        switch (ch) { | 
 | 
          case '/':  | 
 | 
            net = true;  | 
 | 
          case '>':  | 
 | 
            ch = readCh();  | 
 | 
            if (ch == '>' && net) { | 
 | 
                ch = readCh();  | 
 | 
            }  | 
 | 
          case '<':  | 
 | 
            break;  | 
 | 
 | 
 | 
          default:  | 
 | 
            error("expected", "'>'"); | 
 | 
            break;  | 
 | 
        }  | 
 | 
 | 
 | 
        if (!strict) { | 
 | 
          if (elem.getName().equals("script")) { | 
 | 
            error("javascript.unsupported"); | 
 | 
          }  | 
 | 
        }  | 
 | 
 | 
 | 
        // ignore RE after start tag  | 
 | 
          | 
 | 
        if (!elem.isEmpty())  { | 
 | 
            if (ch == '\n') { | 
 | 
                ln++;  | 
 | 
                lfCount++;  | 
 | 
                ch = readCh();  | 
 | 
            } else if (ch == '\r') { | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                }  | 
 | 
                else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
            }  | 
 | 
        }  | 
 | 
 | 
 | 
          | 
 | 
        TagElement tag = makeTag(elem, false);  | 
 | 
 | 
 | 
 | 
 | 
        /** In dealing with forms, we have decided to treat  | 
 | 
            them as legal in any context.  Also, even though  | 
 | 
            they do have a start and an end tag, we will  | 
 | 
            not put this tag on the stack.  This is to deal  | 
 | 
            several pages in the web oasis that choose to  | 
 | 
            start and end forms in any possible location. **/  | 
 | 
 | 
 | 
        /*  | 
 | 
        if (!strict && elem.getName().equals("form")) { | 
 | 
            if (lastFormSent == null) { | 
 | 
                lastFormSent = tag;  | 
 | 
            } else { | 
 | 
                handleEndTag(lastFormSent);  | 
 | 
                lastFormSent = tag;  | 
 | 
            }  | 
 | 
        } else { | 
 | 
        */  | 
 | 
            // Smlly, if a tag is unknown, we will apply  | 
 | 
            // no legalTagContext logic to it.  | 
 | 
              | 
 | 
            if (!unknown) { | 
 | 
                legalTagContext(tag);  | 
 | 
 | 
 | 
                // If skip tag is true,  this implies that  | 
 | 
                // the tag was illegal and that the error  | 
 | 
                // recovery strategy adopted is to ignore  | 
 | 
                  | 
 | 
                if (!strict && skipTag) { | 
 | 
                    skipTag = false;  | 
 | 
                    return;  | 
 | 
                }  | 
 | 
            }  | 
 | 
            /*  | 
 | 
        }  | 
 | 
            */  | 
 | 
 | 
 | 
        startTag(tag);  | 
 | 
 | 
 | 
        if (!elem.isEmpty()) { | 
 | 
            switch (elem.getType()) { | 
 | 
              case CDATA:  | 
 | 
                parseLiteral(false);  | 
 | 
                break;  | 
 | 
              case RCDATA:  | 
 | 
                parseLiteral(true);  | 
 | 
                break;  | 
 | 
              default:  | 
 | 
                if (stack != null) { | 
 | 
                    stack.net = net;  | 
 | 
                }  | 
 | 
                break;  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
    private static final String START_COMMENT = "<!--";  | 
 | 
    private static final String END_COMMENT = "-->";  | 
 | 
    private static final char[] SCRIPT_END_TAG = "</script>".toCharArray();  | 
 | 
    private static final char[] SCRIPT_END_TAG_UPPER_CASE =  | 
 | 
                                        "</SCRIPT>".toCharArray();  | 
 | 
 | 
 | 
    void parseScript() throws IOException { | 
 | 
        char[] charsToAdd = new char[SCRIPT_END_TAG.length];  | 
 | 
        boolean insideComment = false;  | 
 | 
 | 
 | 
          | 
 | 
        while (true) { | 
 | 
            int i = 0;  | 
 | 
            while (!insideComment && i < SCRIPT_END_TAG.length  | 
 | 
                    && (SCRIPT_END_TAG[i] == ch  | 
 | 
                    || SCRIPT_END_TAG_UPPER_CASE[i] == ch)) { | 
 | 
                charsToAdd[i] = (char) ch;  | 
 | 
                ch = readCh();  | 
 | 
                i++;  | 
 | 
            }  | 
 | 
            if (i == SCRIPT_END_TAG.length) { | 
 | 
                return;  | 
 | 
            }  | 
 | 
 | 
 | 
            if (!insideComment && i == 1 && charsToAdd[0] == START_COMMENT.charAt(0)) { | 
 | 
                  | 
 | 
                while (i < START_COMMENT.length()  | 
 | 
                        && START_COMMENT.charAt(i) == ch) { | 
 | 
                    charsToAdd[i] = (char) ch;  | 
 | 
                    ch = readCh();  | 
 | 
                    i++;  | 
 | 
                }  | 
 | 
                if (i == START_COMMENT.length()) { | 
 | 
                    insideComment = true;  | 
 | 
                }  | 
 | 
            }  | 
 | 
            if (insideComment) { | 
 | 
                while (i < END_COMMENT.length()  | 
 | 
                        && END_COMMENT.charAt(i) == ch) { | 
 | 
                    charsToAdd[i] = (char) ch;  | 
 | 
                    ch = readCh();  | 
 | 
                    i++;  | 
 | 
                }  | 
 | 
                if (i == END_COMMENT.length()) { | 
 | 
                    insideComment = false;  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if (i > 0) { | 
 | 
                for (int j = 0; j < i; j++) { | 
 | 
                    addString(charsToAdd[j]);  | 
 | 
                }  | 
 | 
                continue;  | 
 | 
            }  | 
 | 
            switch (ch) { | 
 | 
            case -1:  | 
 | 
                error("eof.script"); | 
 | 
                return;  | 
 | 
            case '\n':  | 
 | 
                ln++;  | 
 | 
                ch = readCh();  | 
 | 
                lfCount++;  | 
 | 
                addString('\n'); | 
 | 
                break;  | 
 | 
            case '\r':  | 
 | 
                ln++;  | 
 | 
                if ((ch = readCh()) == '\n') { | 
 | 
                    ch = readCh();  | 
 | 
                    crlfCount++;  | 
 | 
                } else { | 
 | 
                    crCount++;  | 
 | 
                }  | 
 | 
                addString('\n'); | 
 | 
                break;  | 
 | 
            default:  | 
 | 
                addString(ch);  | 
 | 
                ch = readCh();  | 
 | 
                break;  | 
 | 
            } // switch  | 
 | 
        } // while  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    void parseContent() throws IOException { | 
 | 
        Thread curThread = Thread.currentThread();  | 
 | 
 | 
 | 
        for (;;) { | 
 | 
            if (curThread.isInterrupted()) { | 
 | 
                curThread.interrupt();   | 
 | 
                break;  | 
 | 
            }  | 
 | 
 | 
 | 
            int c = ch;  | 
 | 
            currentBlockStartPos = currentPosition;  | 
 | 
 | 
 | 
            if (recent == dtd.script) { // means: if after starting <script> tag | 
 | 
 | 
 | 
                  | 
 | 
                parseScript();  | 
 | 
                last = makeTag(dtd.getElement("comment"), true); | 
 | 
 | 
 | 
                  | 
 | 
                String str = new String(getChars(0)).trim();  | 
 | 
                int minLength = START_COMMENT.length() + END_COMMENT.length();  | 
 | 
                if (str.startsWith(START_COMMENT) && str.endsWith(END_COMMENT)  | 
 | 
                       && str.length() >= (minLength)) { | 
 | 
                    str = str.substring(START_COMMENT.length(),  | 
 | 
                                      str.length() - END_COMMENT.length());  | 
 | 
                }  | 
 | 
 | 
 | 
                  | 
 | 
                handleComment(str.toCharArray());  | 
 | 
                endTag(false);  | 
 | 
                lastBlockStartPos = currentPosition;  | 
 | 
 | 
 | 
                continue;  | 
 | 
            } else { | 
 | 
                switch (c) { | 
 | 
                  case '<':  | 
 | 
                    parseTag();  | 
 | 
                    lastBlockStartPos = currentPosition;  | 
 | 
                    continue;  | 
 | 
 | 
 | 
                  case '/':  | 
 | 
                    ch = readCh();  | 
 | 
                    if ((stack != null) && stack.net) { | 
 | 
                          | 
 | 
                        endTag(false);  | 
 | 
                        continue;  | 
 | 
                    } else if (textpos == 0) { | 
 | 
                        if (!legalElementContext(dtd.pcdata)) { | 
 | 
                            error("unexpected.pcdata"); | 
 | 
                        }  | 
 | 
                        if (last.breaksFlow()) { | 
 | 
                            space = false;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    break;  | 
 | 
 | 
 | 
                  case -1:  | 
 | 
                    return;  | 
 | 
 | 
 | 
                  case '&':  | 
 | 
                    if (textpos == 0) { | 
 | 
                        if (!legalElementContext(dtd.pcdata)) { | 
 | 
                            error("unexpected.pcdata"); | 
 | 
                        }  | 
 | 
                        if (last.breaksFlow()) { | 
 | 
                            space = false;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    char data[] = parseEntityReference();  | 
 | 
                    if (textpos + data.length + 1 > text.length) { | 
 | 
                        char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];  | 
 | 
                        System.arraycopy(text, 0, newtext, 0, text.length);  | 
 | 
                        text = newtext;  | 
 | 
                    }  | 
 | 
                    if (space) { | 
 | 
                        space = false;  | 
 | 
                        text[textpos++] = ' ';  | 
 | 
                    }  | 
 | 
                    System.arraycopy(data, 0, text, textpos, data.length);  | 
 | 
                    textpos += data.length;  | 
 | 
                    ignoreSpace = false;  | 
 | 
                    continue;  | 
 | 
 | 
 | 
                  case '\n':  | 
 | 
                    ln++;  | 
 | 
                    lfCount++;  | 
 | 
                    ch = readCh();  | 
 | 
                    if ((stack != null) && stack.pre) { | 
 | 
                        break;  | 
 | 
                    }  | 
 | 
                    if (textpos == 0) { | 
 | 
                        lastBlockStartPos = currentPosition;  | 
 | 
                    }  | 
 | 
                    if (!ignoreSpace) { | 
 | 
                        space = true;  | 
 | 
                    }  | 
 | 
                    continue;  | 
 | 
 | 
 | 
                  case '\r':  | 
 | 
                    ln++;  | 
 | 
                    c = '\n';  | 
 | 
                    if ((ch = readCh()) == '\n') { | 
 | 
                        ch = readCh();  | 
 | 
                        crlfCount++;  | 
 | 
                    }  | 
 | 
                    else { | 
 | 
                        crCount++;  | 
 | 
                    }  | 
 | 
                    if ((stack != null) && stack.pre) { | 
 | 
                        break;  | 
 | 
                    }  | 
 | 
                    if (textpos == 0) { | 
 | 
                        lastBlockStartPos = currentPosition;  | 
 | 
                    }  | 
 | 
                    if (!ignoreSpace) { | 
 | 
                        space = true;  | 
 | 
                    }  | 
 | 
                    continue;  | 
 | 
 | 
 | 
 | 
 | 
                  case '\t':  | 
 | 
                  case ' ':  | 
 | 
                    ch = readCh();  | 
 | 
                    if ((stack != null) && stack.pre) { | 
 | 
                        break;  | 
 | 
                    }  | 
 | 
                    if (textpos == 0) { | 
 | 
                        lastBlockStartPos = currentPosition;  | 
 | 
                    }  | 
 | 
                    if (!ignoreSpace) { | 
 | 
                        space = true;  | 
 | 
                    }  | 
 | 
                    continue;  | 
 | 
 | 
 | 
                  default:  | 
 | 
                    if (textpos == 0) { | 
 | 
                        if (!legalElementContext(dtd.pcdata)) { | 
 | 
                            error("unexpected.pcdata"); | 
 | 
                        }  | 
 | 
                        if (last.breaksFlow()) { | 
 | 
                            space = false;  | 
 | 
                        }  | 
 | 
                    }  | 
 | 
                    ch = readCh();  | 
 | 
                    break;  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if (textpos + 2 > text.length) { | 
 | 
                char newtext[] = new char[text.length + 128];  | 
 | 
                System.arraycopy(text, 0, newtext, 0, text.length);  | 
 | 
                text = newtext;  | 
 | 
            }  | 
 | 
 | 
 | 
              | 
 | 
            if (space) { | 
 | 
                if (textpos == 0) { | 
 | 
                    lastBlockStartPos--;  | 
 | 
                }  | 
 | 
                text[textpos++] = ' ';  | 
 | 
                space = false;  | 
 | 
            }  | 
 | 
            text[textpos++] = (char)c;  | 
 | 
            ignoreSpace = false;  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    String getEndOfLineString() { | 
 | 
        if (crlfCount >= crCount) { | 
 | 
            if (lfCount >= crlfCount) { | 
 | 
                return "\n";  | 
 | 
            }  | 
 | 
            else { | 
 | 
                return "\r\n";  | 
 | 
            }  | 
 | 
        }  | 
 | 
        else { | 
 | 
            if (crCount > lfCount) { | 
 | 
                return "\r";  | 
 | 
            }  | 
 | 
            else { | 
 | 
                return "\n";  | 
 | 
            }  | 
 | 
        }  | 
 | 
    }  | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
     */  | 
 | 
    public synchronized void parse(Reader in) throws IOException { | 
 | 
        this.in = in;  | 
 | 
 | 
 | 
        this.ln = 1;  | 
 | 
 | 
 | 
        seenHtml = false;  | 
 | 
        seenHead = false;  | 
 | 
        seenBody = false;  | 
 | 
 | 
 | 
        crCount = lfCount = crlfCount = 0;  | 
 | 
 | 
 | 
        try { | 
 | 
            ch = readCh();  | 
 | 
            text = new char[1024];  | 
 | 
            str = new char[128];  | 
 | 
 | 
 | 
            parseContent();  | 
 | 
            // NOTE: interruption may have occurred.  Control flows out  | 
 | 
              | 
 | 
            while (stack != null) { | 
 | 
                endTag(true);  | 
 | 
            }  | 
 | 
            in.close();  | 
 | 
        } catch (IOException e) { | 
 | 
            errorContext();  | 
 | 
            error("ioexception"); | 
 | 
            throw e;  | 
 | 
        } catch (Exception e) { | 
 | 
            errorContext();  | 
 | 
            error("exception", e.getClass().getName(), e.getMessage()); | 
 | 
            e.printStackTrace();  | 
 | 
        } catch (ThreadDeath e) { | 
 | 
            errorContext();  | 
 | 
            error("terminated"); | 
 | 
            e.printStackTrace();  | 
 | 
            throw e;  | 
 | 
        } finally { | 
 | 
            for (; stack != null ; stack = stack.next) { | 
 | 
                handleEndTag(stack.tag);  | 
 | 
            }  | 
 | 
 | 
 | 
            text = null;  | 
 | 
            str = null;  | 
 | 
        }  | 
 | 
 | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
 | 
     */  | 
 | 
    private char buf[] = new char[1];  | 
 | 
    private int pos;  | 
 | 
    private int len;  | 
 | 
      | 
 | 
 | 
 | 
 | 
 | 
    */  | 
 | 
    private int currentPosition;  | 
 | 
 | 
 | 
 | 
 | 
    private final int readCh() throws IOException { | 
 | 
 | 
 | 
        if (pos >= len) { | 
 | 
 | 
 | 
            // This loop allows us to ignore interrupts if the flag  | 
 | 
              | 
 | 
            for (;;) { | 
 | 
                try { | 
 | 
                    len = in.read(buf);  | 
 | 
                    break;  | 
 | 
                } catch (InterruptedIOException ex) { | 
 | 
                    throw ex;  | 
 | 
                }  | 
 | 
            }  | 
 | 
 | 
 | 
            if (len <= 0) { | 
 | 
                return -1;        | 
 | 
            }  | 
 | 
            pos = 0;  | 
 | 
        }  | 
 | 
        ++currentPosition;  | 
 | 
 | 
 | 
        return buf[pos++];  | 
 | 
    }  | 
 | 
 | 
 | 
 | 
 | 
    protected int getCurrentPos() { | 
 | 
        return currentPosition;  | 
 | 
    }  | 
 | 
}  |