|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.cyberneko.html.filters.DefaultFilter
org.cyberneko.html.filters.Purifier
public class Purifier
This filter purifies the HTML input to ensure XML well-formedness. The purification process includes:
Illegal characters in XML names are converted to the character sequence "_u####_" where "####" is the value of the Unicode character represented in hexadecimal. Whereas illegal characters appearing in document content is converted to the character sequence "\\u####".
In comments, the character '-' is replaced by the character sequence "- " to prevent "--" from ever appearing in the comment content. For CDATA sections, the character ']' is replaced by the character sequence "] " to prevent "]]" from appearing.
The URI used for synthesized namespace bindings is "http://cyberneko.org/html/ns/synthesized/number" where number is generated to ensure uniqueness.
Field Summary | |
---|---|
protected static java.lang.String |
AUGMENTATIONS
Include infoset augmentations. |
protected boolean |
fAugmentations
Augmentations. |
protected boolean |
fInCDATASection
True if inside a CDATA section. |
protected org.apache.xerces.xni.NamespaceContext |
fNamespaceContext
Namespace information. |
protected boolean |
fNamespaces
Namespaces. |
protected java.lang.String |
fPublicId
Public identifier of doctype declaration. |
protected boolean |
fSeenDoctype
True if the doctype declaration was seen. |
protected boolean |
fSeenRootElement
True if root element was seen. |
protected int |
fSynthesizedNamespaceCount
Synthesized namespace binding count. |
protected java.lang.String |
fSystemId
System identifier of doctype declaration. |
protected static java.lang.String |
NAMESPACES
Namespaces. |
protected static HTMLEventInfo |
SYNTHESIZED_ITEM
Synthesized event info item. |
static java.lang.String |
SYNTHESIZED_NAMESPACE_PREFX
Synthesized namespace binding prefix. |
Fields inherited from class org.cyberneko.html.filters.DefaultFilter |
---|
fDocumentHandler, fDocumentSource |
Constructor Summary | |
---|---|
Purifier()
|
Method Summary | |
---|---|
void |
characters(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
Characters. |
void |
comment(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
Comment. |
void |
doctypeDecl(java.lang.String root,
java.lang.String pubid,
java.lang.String sysid,
org.apache.xerces.xni.Augmentations augs)
Doctype declaration. |
void |
emptyElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
Empty element. |
void |
endCDATA(org.apache.xerces.xni.Augmentations augs)
End CDATA section. |
void |
endElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.Augmentations augs)
End element. |
protected void |
handleStartDocument()
Handle start document. |
protected void |
handleStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs)
Handle start element. |
void |
processingInstruction(java.lang.String target,
org.apache.xerces.xni.XMLString data,
org.apache.xerces.xni.Augmentations augs)
Processing instruction. |
protected java.lang.String |
purifyName(java.lang.String name,
boolean localpart)
Purify name. |
protected org.apache.xerces.xni.QName |
purifyQName(org.apache.xerces.xni.QName qname)
Purify qualified name. |
protected org.apache.xerces.xni.XMLString |
purifyText(org.apache.xerces.xni.XMLString text)
Purify content. |
void |
reset(org.apache.xerces.xni.parser.XMLComponentManager manager)
Resets the component. |
void |
startCDATA(org.apache.xerces.xni.Augmentations augs)
Start CDATA section. |
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
Start document. |
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.NamespaceContext nscontext,
org.apache.xerces.xni.Augmentations augs)
Start document. |
void |
startElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
Start element. |
protected void |
synthesizeBinding(org.apache.xerces.xni.XMLAttributes attrs,
java.lang.String ns)
Synthesize namespace binding. |
protected org.apache.xerces.xni.Augmentations |
synthesizedAugs()
Returns an augmentations object with a synthesized item added. |
protected static java.lang.String |
toHexString(int c,
int padlen)
Returns a padded hexadecimal string for the given value. |
void |
xmlDecl(java.lang.String version,
java.lang.String encoding,
java.lang.String standalone,
org.apache.xerces.xni.Augmentations augs)
XML declaration. |
Methods inherited from class org.cyberneko.html.filters.DefaultFilter |
---|
endDocument, endGeneralEntity, endPrefixMapping, getDocumentHandler, getDocumentSource, getFeatureDefault, getPropertyDefault, getRecognizedFeatures, getRecognizedProperties, ignorableWhitespace, merge, setDocumentHandler, setDocumentSource, setFeature, setProperty, startGeneralEntity, startPrefixMapping, textDecl |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final java.lang.String SYNTHESIZED_NAMESPACE_PREFX
protected static final java.lang.String NAMESPACES
protected static final java.lang.String AUGMENTATIONS
protected static final HTMLEventInfo SYNTHESIZED_ITEM
protected boolean fNamespaces
protected boolean fAugmentations
protected boolean fSeenDoctype
protected boolean fSeenRootElement
protected boolean fInCDATASection
protected java.lang.String fPublicId
protected java.lang.String fSystemId
protected org.apache.xerces.xni.NamespaceContext fNamespaceContext
protected int fSynthesizedNamespaceCount
Constructor Detail |
---|
public Purifier()
Method Detail |
---|
public void reset(org.apache.xerces.xni.parser.XMLComponentManager manager) throws org.apache.xerces.xni.parser.XMLConfigurationException
DefaultFilter
reset
in interface org.apache.xerces.xni.parser.XMLComponent
reset
in class DefaultFilter
manager
- The component manager.
org.apache.xerces.xni.parser.XMLConfigurationException
public void startDocument(org.apache.xerces.xni.XMLLocator locator, java.lang.String encoding, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
startDocument
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void startDocument(org.apache.xerces.xni.XMLLocator locator, java.lang.String encoding, org.apache.xerces.xni.NamespaceContext nscontext, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
startDocument
in interface org.apache.xerces.xni.XMLDocumentHandler
startDocument
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void xmlDecl(java.lang.String version, java.lang.String encoding, java.lang.String standalone, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
xmlDecl
in interface org.apache.xerces.xni.XMLDocumentHandler
xmlDecl
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void comment(org.apache.xerces.xni.XMLString text, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
comment
in interface org.apache.xerces.xni.XMLDocumentHandler
comment
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void processingInstruction(java.lang.String target, org.apache.xerces.xni.XMLString data, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
processingInstruction
in interface org.apache.xerces.xni.XMLDocumentHandler
processingInstruction
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void doctypeDecl(java.lang.String root, java.lang.String pubid, java.lang.String sysid, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
doctypeDecl
in interface org.apache.xerces.xni.XMLDocumentHandler
doctypeDecl
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void startElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.XMLAttributes attrs, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
startElement
in interface org.apache.xerces.xni.XMLDocumentHandler
startElement
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void emptyElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.XMLAttributes attrs, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
emptyElement
in interface org.apache.xerces.xni.XMLDocumentHandler
emptyElement
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void startCDATA(org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
startCDATA
in interface org.apache.xerces.xni.XMLDocumentHandler
startCDATA
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void endCDATA(org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
endCDATA
in interface org.apache.xerces.xni.XMLDocumentHandler
endCDATA
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void characters(org.apache.xerces.xni.XMLString text, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
characters
in interface org.apache.xerces.xni.XMLDocumentHandler
characters
in class DefaultFilter
org.apache.xerces.xni.XNIException
public void endElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.Augmentations augs) throws org.apache.xerces.xni.XNIException
endElement
in interface org.apache.xerces.xni.XMLDocumentHandler
endElement
in class DefaultFilter
org.apache.xerces.xni.XNIException
protected void handleStartDocument()
protected void handleStartElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.XMLAttributes attrs)
protected void synthesizeBinding(org.apache.xerces.xni.XMLAttributes attrs, java.lang.String ns)
protected final org.apache.xerces.xni.Augmentations synthesizedAugs()
protected org.apache.xerces.xni.QName purifyQName(org.apache.xerces.xni.QName qname)
protected java.lang.String purifyName(java.lang.String name, boolean localpart)
protected org.apache.xerces.xni.XMLString purifyText(org.apache.xerces.xni.XMLString text)
protected static java.lang.String toHexString(int c, int padlen)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |