// =================================================================================================
// ADOBE SYSTEMS INCORPORATED
// Copyright 2006 Adobe Systems Incorporated
// All Rights Reserved
//
// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
// of the Adobe license agreement accompanying it.
// =================================================================================================
package com.adobe.xmp.impl;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import org.w3c.dom.Attr;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import com.adobe.xmp.XMPConst;
import com.adobe.xmp.XMPError;
import com.adobe.xmp.XMPException;
import com.adobe.xmp.XMPMetaFactory;
import com.adobe.xmp.XMPSchemaRegistry;
import com.adobe.xmp.options.PropertyOptions;
/**
* Parser for "normal" XML serialisation of RDF.
*
* @since 14.07.2006
*/
public class ParseRDF implements XMPError, XMPConst
{
/** */
public static final int RDFTERM_OTHER = 0;
/** Start of coreSyntaxTerms. */
public static final int RDFTERM_RDF = 1;
/** */
public static final int RDFTERM_ID = 2;
/** */
public static final int RDFTERM_ABOUT = 3;
/** */
public static final int RDFTERM_PARSE_TYPE = 4;
/** */
public static final int RDFTERM_RESOURCE = 5;
/** */
public static final int RDFTERM_NODE_ID = 6;
/** End of coreSyntaxTerms */
public static final int RDFTERM_DATATYPE = 7;
/** Start of additions for syntax Terms. */
public static final int RDFTERM_DESCRIPTION = 8;
/** End of of additions for syntaxTerms. */
public static final int RDFTERM_LI = 9;
/** Start of oldTerms. */
public static final int RDFTERM_ABOUT_EACH = 10;
/** */
public static final int RDFTERM_ABOUT_EACH_PREFIX = 11;
/** End of oldTerms. */
public static final int RDFTERM_BAG_ID = 12;
/** */
public static final int RDFTERM_FIRST_CORE = RDFTERM_RDF;
/** */
public static final int RDFTERM_LAST_CORE = RDFTERM_DATATYPE;
/** ! Yes, the syntax terms include the core terms. */
public static final int RDFTERM_FIRST_SYNTAX = RDFTERM_FIRST_CORE;
/** */
public static final int RDFTERM_LAST_SYNTAX = RDFTERM_LI;
/** */
public static final int RDFTERM_FIRST_OLD = RDFTERM_ABOUT_EACH;
/** */
public static final int RDFTERM_LAST_OLD = RDFTERM_BAG_ID;
/** this prefix is used for default namespaces */
public static final String DEFAULT_PREFIX = "_dflt";
/**
* The main parsing method. The XML tree is walked through from the root node and and XMP tree
* is created. This is a raw parse, the normalisation of the XMP tree happens outside.
*
* @param xmlRoot the XML root node
* @return Returns an XMP metadata object (not normalized)
* @throws XMPException Occurs if the parsing fails for any reason.
*/
static XMPMetaImpl parse(Node xmlRoot) throws XMPException
{
XMPMetaImpl xmp = new XMPMetaImpl();
rdf_RDF(xmp, xmlRoot);
return xmp;
}
/**
* Each of these parsing methods is responsible for recognizing an RDF
* syntax production and adding the appropriate structure to the XMP tree.
* They simply return for success, failures will throw an exception.
*
* @param xmp the xmp metadata object that is generated
* @param rdfRdfNode the top-level xml node
* @throws XMPException thown on parsing errors
*/
static void rdf_RDF(XMPMetaImpl xmp, Node rdfRdfNode) throws XMPException
{
if (rdfRdfNode.hasAttributes())
{
rdf_NodeElementList (xmp, xmp.getRoot(), rdfRdfNode);
}
else
{
throw new XMPException("Invalid attributes of rdf:RDF element", BADRDF);
}
}
/**
* 7.2.10 nodeElementList
* ws* ( nodeElement ws* )*
*
* Note: this method is only called from the rdf:RDF-node (top level)
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param rdfRdfNode the top-level xml node
* @throws XMPException thown on parsing errors
*/
private static void rdf_NodeElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node rdfRdfNode)
throws XMPException
{
for (int i = 0; i < rdfRdfNode.getChildNodes().getLength(); i++)
{
Node child = rdfRdfNode.getChildNodes().item(i);
// filter whitespaces (and all text nodes)
if (!isWhitespaceNode(child))
{
rdf_NodeElement (xmp, xmpParent, child, true);
}
}
}
/**
* 7.2.5 nodeElementURIs
* anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
*
* 7.2.11 nodeElement
* start-element ( URI == nodeElementURIs,
* attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) )
* propertyEltList
* end-element()
*
* A node element URI is rdf:Description or anything else that is not an RDF
* term.
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_NodeElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
boolean isTopLevel) throws XMPException
{
int nodeTerm = getRDFTermKind (xmlNode);
if (nodeTerm != RDFTERM_DESCRIPTION && nodeTerm != RDFTERM_OTHER)
{
throw new XMPException("Node element must be rdf:Description or typed node",
BADRDF);
}
else if (isTopLevel && nodeTerm == RDFTERM_OTHER)
{
throw new XMPException("Top level typed node not allowed", BADXMP);
}
else
{
rdf_NodeElementAttrs (xmp, xmpParent, xmlNode, isTopLevel);
rdf_PropertyElementList (xmp, xmpParent, xmlNode, isTopLevel);
}
}
/**
*
* 7.2.7 propertyAttributeURIs
* anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
*
* 7.2.11 nodeElement
* start-element ( URI == nodeElementURIs,
* attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) )
* propertyEltList
* end-element()
*
* Process the attribute list for an RDF node element. A property attribute URI is
* anything other than an RDF term. The rdf:ID and rdf:nodeID attributes are simply ignored,
* as are rdf:about attributes on inner nodes.
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_NodeElementAttrs(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
boolean isTopLevel) throws XMPException
{
// Used to detect attributes that are mutually exclusive.
int exclusiveAttrs = 0;
for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
{
Node attribute = xmlNode.getAttributes().item(i);
// quick hack, ns declarations do not appear in C++
// ignore "ID" without namespace
if ("xmlns".equals(attribute.getPrefix()) ||
(attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName())))
{
continue;
}
int attrTerm = getRDFTermKind(attribute);
switch (attrTerm)
{
case RDFTERM_ID:
case RDFTERM_NODE_ID:
case RDFTERM_ABOUT:
if (exclusiveAttrs > 0)
{
throw new XMPException("Mutally exclusive about, ID, nodeID attributes",
BADRDF);
}
exclusiveAttrs++;
if (isTopLevel && (attrTerm == RDFTERM_ABOUT))
{
// This is the rdf:about attribute on a top level node. Set
// the XMP tree name if
// it doesn't have a name yet. Make sure this name matches
// the XMP tree name.
if (xmpParent.getName() != null && xmpParent.getName().length() > 0)
{
if (!xmpParent.getName().equals(attribute.getNodeValue()))
{
throw new XMPException("Mismatched top level rdf:about values",
BADXMP);
}
}
else
{
xmpParent.setName(attribute.getNodeValue());
}
}
break;
case RDFTERM_OTHER:
addChildNode(xmp, xmpParent, attribute, attribute.getNodeValue(), isTopLevel);
break;
default:
throw new XMPException("Invalid nodeElement attribute", BADRDF);
}
}
}
/**
* 7.2.13 propertyEltList
* ws* ( propertyElt ws* )*
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlParent the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_PropertyElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlParent,
boolean isTopLevel) throws XMPException
{
for (int i = 0; i < xmlParent.getChildNodes().getLength(); i++)
{
Node currChild = xmlParent.getChildNodes().item(i);
if (isWhitespaceNode(currChild))
{
continue;
}
else if (currChild.getNodeType() != Node.ELEMENT_NODE)
{
throw new XMPException("Expected property element node not found", BADRDF);
}
else
{
rdf_PropertyElement(xmp, xmpParent, currChild, isTopLevel);
}
}
}
/**
* 7.2.14 propertyElt
*
* resourcePropertyElt | literalPropertyElt | parseTypeLiteralPropertyElt |
* parseTypeResourcePropertyElt | parseTypeCollectionPropertyElt |
* parseTypeOtherPropertyElt | emptyPropertyElt
*
* 7.2.15 resourcePropertyElt
* start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) )
* ws* nodeElement ws*
* end-element()
*
* 7.2.16 literalPropertyElt
* start-element (
* URI == propertyElementURIs, attributes == set ( idAttr?, datatypeAttr?) )
* text()
* end-element()
*
* 7.2.17 parseTypeLiteralPropertyElt
* start-element (
* URI == propertyElementURIs, attributes == set ( idAttr?, parseLiteral ) )
* literal
* end-element()
*
* 7.2.18 parseTypeResourcePropertyElt
* start-element (
* URI == propertyElementURIs, attributes == set ( idAttr?, parseResource ) )
* propertyEltList
* end-element()
*
* 7.2.19 parseTypeCollectionPropertyElt
* start-element (
* URI == propertyElementURIs, attributes == set ( idAttr?, parseCollection ) )
* nodeElementList
* end-element()
*
* 7.2.20 parseTypeOtherPropertyElt
* start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) )
* propertyEltList
* end-element()
*
* 7.2.21 emptyPropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set ( idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) )
* end-element()
*
* The various property element forms are not distinguished by the XML element name,
* but by their attributes for the most part. The exceptions are resourcePropertyElt and
* literalPropertyElt. They are distinguished by their XML element content.
*
* NOTE: The RDF syntax does not explicitly include the xml:lang attribute although it can
* appear in many of these. We have to allow for it in the attibute counts below.
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_PropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
boolean isTopLevel) throws XMPException
{
int nodeTerm = getRDFTermKind (xmlNode);
if (!isPropertyElementName(nodeTerm))
{
throw new XMPException("Invalid property element name", BADRDF);
}
// remove the namespace-definitions from the list
NamedNodeMap attributes = xmlNode.getAttributes();
List nsAttrs = null;
for (int i = 0; i < attributes.getLength(); i++)
{
Node attribute = attributes.item(i);
if ("xmlns".equals(attribute.getPrefix()) ||
(attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName())))
{
if (nsAttrs == null)
{
nsAttrs = new ArrayList();
}
nsAttrs.add(attribute.getNodeName());
}
}
if (nsAttrs != null)
{
for (Iterator it = nsAttrs.iterator(); it.hasNext();)
{
String ns = (String) it.next();
attributes.removeNamedItem(ns);
}
}
if (attributes.getLength() > 3)
{
// Only an emptyPropertyElt can have more than 3 attributes.
rdf_EmptyPropertyElement(xmp, xmpParent, xmlNode, isTopLevel);
}
else
{
// Look through the attributes for one that isn't rdf:ID or xml:lang,
// it will usually tell what we should be dealing with.
// The called routines must verify their specific syntax!
for (int i = 0; i < attributes.getLength(); i++)
{
Node attribute = attributes.item(i);
String attrLocal = attribute.getLocalName();
String attrNS = attribute.getNamespaceURI();
String attrValue = attribute.getNodeValue();
if (!(XML_LANG.equals(attribute.getNodeName()) &&
!("ID".equals(attrLocal) && NS_RDF.equals(attrNS))))
{
if ("datatype".equals(attrLocal) && NS_RDF.equals(attrNS))
{
rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
}
else if (!("parseType".equals(attrLocal) && NS_RDF.equals(attrNS)))
{
rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
}
else if ("Literal".equals(attrValue))
{
rdf_ParseTypeLiteralPropertyElement();
}
else if ("Resource".equals(attrValue))
{
rdf_ParseTypeResourcePropertyElement(xmp, xmpParent, xmlNode, isTopLevel);
}
else if ("Collection".equals(attrValue))
{
rdf_ParseTypeCollectionPropertyElement();
}
else
{
rdf_ParseTypeOtherPropertyElement();
}
return;
}
}
// Only rdf:ID and xml:lang, could be a resourcePropertyElt, a literalPropertyElt,
// or an emptyPropertyElt. Look at the child XML nodes to decide which.
if (xmlNode.hasChildNodes())
{
for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++)
{
Node currChild = xmlNode.getChildNodes().item(i);
if (currChild.getNodeType() != Node.TEXT_NODE)
{
rdf_ResourcePropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
return;
}
}
rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
}
else
{
rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
}
}
}
/**
* 7.2.15 resourcePropertyElt
* start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) )
* ws* nodeElement ws*
* end-element()
*
* This handles structs using an rdf:Description node,
* arrays using rdf:Bag/Seq/Alt, and typedNodes. It also catches and cleans up qualified
* properties written with rdf:Description and rdf:value.
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_ResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
Node xmlNode, boolean isTopLevel) throws XMPException
{
if (isTopLevel && "iX:changes".equals(xmlNode.getNodeName()))
{
// Strip old "punchcard" chaff which has on the prefix "iX:".
return;
}
XMPNode newCompound = addChildNode(xmp, xmpParent, xmlNode, "", isTopLevel);
// walk through the attributes
for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
{
Node attribute = xmlNode.getAttributes().item(i);
if ("xmlns".equals(attribute.getPrefix()) ||
(attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName())))
{
continue;
}
String attrLocal = attribute.getLocalName();
String attrNS = attribute.getNamespaceURI();
if (XML_LANG.equals(attribute.getNodeName()))
{
addQualifierNode (newCompound, XML_LANG, attribute.getNodeValue());
}
else if ("ID".equals(attrLocal) && NS_RDF.equals(attrNS))
{
continue; // Ignore all rdf:ID attributes.
}
else
{
throw new XMPException(
"Invalid attribute for resource property element", BADRDF);
}
}
// walk through the children
Node currChild = null;
boolean found = false;
int i;
for (i = 0; i < xmlNode.getChildNodes().getLength(); i++)
{
currChild = xmlNode.getChildNodes().item(i);
if (!isWhitespaceNode(currChild))
{
if (currChild.getNodeType() == Node.ELEMENT_NODE && !found)
{
boolean isRDF = NS_RDF.equals(currChild.getNamespaceURI());
String childLocal = currChild.getLocalName();
if (isRDF && "Bag".equals(childLocal))
{
newCompound.getOptions().setArray(true);
}
else if (isRDF && "Seq".equals(childLocal))
{
newCompound.getOptions().setArray(true).setArrayOrdered(true);
}
else if (isRDF && "Alt".equals(childLocal))
{
newCompound.getOptions().setArray(true).setArrayOrdered(true)
.setArrayAlternate(true);
}
else
{
newCompound.getOptions().setStruct(true);
if (!isRDF && !"Description".equals(childLocal))
{
String typeName = currChild.getNamespaceURI();
if (typeName == null)
{
throw new XMPException(
"All XML elements must be in a namespace", BADXMP);
}
typeName += ':' + childLocal;
addQualifierNode (newCompound, "rdf:type", typeName);
}
}
rdf_NodeElement (xmp, newCompound, currChild, false);
if (newCompound.getHasValueChild())
{
fixupQualifiedNode (newCompound);
}
else if (newCompound.getOptions().isArrayAlternate())
{
XMPNodeUtils.detectAltText(newCompound);
}
found = true;
}
else if (found)
{
// found second child element
throw new XMPException(
"Invalid child of resource property element", BADRDF);
}
else
{
throw new XMPException(
"Children of resource property element must be XML elements", BADRDF);
}
}
}
if (!found)
{
// didn't found any child elements
throw new XMPException("Missing child of resource property element", BADRDF);
}
}
/**
* 7.2.16 literalPropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set ( idAttr?, datatypeAttr?) )
* text()
* end-element()
*
* Add a leaf node with the text value and qualifiers for the attributes.
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_LiteralPropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
Node xmlNode, boolean isTopLevel) throws XMPException
{
XMPNode newChild = addChildNode (xmp, xmpParent, xmlNode, null, isTopLevel);
for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
{
Node attribute = xmlNode.getAttributes().item(i);
if ("xmlns".equals(attribute.getPrefix()) ||
(attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName())))
{
continue;
}
String attrNS = attribute.getNamespaceURI();
String attrLocal = attribute.getLocalName();
if (XML_LANG.equals(attribute.getNodeName()))
{
addQualifierNode(newChild, XML_LANG, attribute.getNodeValue());
}
else if (NS_RDF.equals(attrNS) &&
("ID".equals(attrLocal) || "datatype".equals(attrLocal)))
{
continue; // Ignore all rdf:ID and rdf:datatype attributes.
}
else
{
throw new XMPException(
"Invalid attribute for literal property element", BADRDF);
}
}
String textValue = "";
for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++)
{
Node child = xmlNode.getChildNodes().item(i);
if (child.getNodeType() == Node.TEXT_NODE)
{
textValue += child.getNodeValue();
}
else
{
throw new XMPException("Invalid child of literal property element", BADRDF);
}
}
newChild.setValue(textValue);
}
/**
* 7.2.17 parseTypeLiteralPropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set ( idAttr?, parseLiteral ) )
* literal
* end-element()
*
* @throws XMPException thown on parsing errors
*/
private static void rdf_ParseTypeLiteralPropertyElement() throws XMPException
{
throw new XMPException("ParseTypeLiteral property element not allowed", BADXMP);
}
/**
* 7.2.18 parseTypeResourcePropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set ( idAttr?, parseResource ) )
* propertyEltList
* end-element()
*
* Add a new struct node with a qualifier for the possible rdf:ID attribute.
* Then process the XML child nodes to get the struct fields.
*
* @param xmp the xmp metadata object that is generated
* @param xmpParent the parent xmp node
* @param xmlNode the currently processed XML node
* @param isTopLevel Flag if the node is a top-level node
* @throws XMPException thown on parsing errors
*/
private static void rdf_ParseTypeResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
Node xmlNode, boolean isTopLevel) throws XMPException
{
XMPNode newStruct = addChildNode (xmp, xmpParent, xmlNode, "", isTopLevel);
newStruct.getOptions().setStruct(true);
for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
{
Node attribute = xmlNode.getAttributes().item(i);
if ("xmlns".equals(attribute.getPrefix()) ||
(attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName())))
{
continue;
}
String attrLocal = attribute.getLocalName();
String attrNS = attribute.getNamespaceURI();
if (XML_LANG.equals(attribute.getNodeName()))
{
addQualifierNode (newStruct, XML_LANG, attribute.getNodeValue());
}
else if (NS_RDF.equals(attrNS) &&
("ID".equals(attrLocal) || "parseType".equals(attrLocal)))
{
continue; // The caller ensured the value is "Resource".
// Ignore all rdf:ID attributes.
}
else
{
throw new XMPException("Invalid attribute for ParseTypeResource property element",
BADRDF);
}
}
rdf_PropertyElementList (xmp, newStruct, xmlNode, false);
if (newStruct.getHasValueChild())
{
fixupQualifiedNode (newStruct);
}
}
/**
* 7.2.19 parseTypeCollectionPropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set ( idAttr?, parseCollection ) )
* nodeElementList
* end-element()
*
* @throws XMPException thown on parsing errors
*/
private static void rdf_ParseTypeCollectionPropertyElement() throws XMPException
{
throw new XMPException("ParseTypeCollection property element not allowed", BADXMP);
}
/**
* 7.2.20 parseTypeOtherPropertyElt
* start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) )
* propertyEltList
* end-element()
*
* @throws XMPException thown on parsing errors
*/
private static void rdf_ParseTypeOtherPropertyElement() throws XMPException
{
throw new XMPException("ParseTypeOther property element not allowed", BADXMP);
}
/**
* 7.2.21 emptyPropertyElt
* start-element ( URI == propertyElementURIs,
* attributes == set (
* idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) )
* end-element()
*
*