|
|||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| Node.java | 100% | 100% | 100% | 100% |
|
||||||||||||||
| 1 | /* Copyright 2002-2005 Elliotte Rusty Harold | |
| 2 | ||
| 3 | This library is free software; you can redistribute it and/or modify | |
| 4 | it under the terms of version 2.1 of the GNU Lesser General Public | |
| 5 | License as published by the Free Software Foundation. | |
| 6 | ||
| 7 | This library is distributed in the hope that it will be useful, | |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 10 | GNU Lesser General Public License for more details. | |
| 11 | ||
| 12 | You should have received a copy of the GNU Lesser General Public | |
| 13 | License along with this library; if not, write to the | |
| 14 | Free Software Foundation, Inc., 59 Temple Place, Suite 330, | |
| 15 | Boston, MA 02111-1307 USA | |
| 16 | ||
| 17 | You can contact Elliotte Rusty Harold by sending e-mail to | |
| 18 | elharo@metalab.unc.edu. Please include the word "XOM" in the | |
| 19 | subject line. The XOM home page is located at http://www.xom.nu/ | |
| 20 | */ | |
| 21 | ||
| 22 | package nu.xom; | |
| 23 | ||
| 24 | import java.util.Iterator; | |
| 25 | import java.util.List; | |
| 26 | ||
| 27 | import org.jaxen.NamespaceContext; | |
| 28 | ||
| 29 | /** | |
| 30 | * | |
| 31 | * <p> | |
| 32 | * The generic superclass for all the contents | |
| 33 | * of an XML document. There are exactly eight kinds of | |
| 34 | * nodes in XOM: | |
| 35 | * </p> | |
| 36 | * | |
| 37 | * <ul> | |
| 38 | * <li><code>Element</code></li> | |
| 39 | * <li><code>Document</code></li> | |
| 40 | * <li><code>Text</code></li> | |
| 41 | * <li><code>Comment</code></li> | |
| 42 | * <li><code>Attribute</code></li> | |
| 43 | * <li><code>ProcessingInstruction</code></li> | |
| 44 | * <li><code>DocType</code></li> | |
| 45 | * <li><code>Namespace</code></li> | |
| 46 | * </ul> | |
| 47 | * | |
| 48 | * <p> | |
| 49 | * Every instance of <code>Node</code> is an | |
| 50 | * instance of one of these eight classes | |
| 51 | * (including, possibly, one of their subclasses). | |
| 52 | * </p> | |
| 53 | * | |
| 54 | * | |
| 55 | * @author Elliotte Rusty Harold | |
| 56 | * @version 1.1b4 | |
| 57 | * | |
| 58 | */ | |
| 59 | public abstract class Node { | |
| 60 | ||
| 61 | ||
| 62 | private ParentNode parent = null; | |
| 63 | ||
| 64 | /** | |
| 65 | * <p> | |
| 66 | * Creates a new <code>Node</code> object. | |
| 67 | * Can only be invoked by other members of | |
| 68 | * the <code>nu.xom</code> package. | |
| 69 | * </p> | |
| 70 | */ | |
| 71 | 13740786 | Node() {} |
| 72 | ||
| 73 | ||
| 74 | /** | |
| 75 | * <p> | |
| 76 | * Returns the XPath 1.0 string-value of this node. | |
| 77 | * </p> | |
| 78 | * | |
| 79 | * @return the XPath 1.0 string-value of this node | |
| 80 | */ | |
| 81 | public abstract String getValue(); | |
| 82 | ||
| 83 | ||
| 84 | /** | |
| 85 | * | |
| 86 | * <p> | |
| 87 | * Returns the document that contains this node, | |
| 88 | * or null if this node is not currently part of a document. | |
| 89 | * Each node belongs to no more than one document at a time. | |
| 90 | * If this node is a <code>Document</code>, then it returns | |
| 91 | * this node. | |
| 92 | * </p> | |
| 93 | * | |
| 94 | * @return the document this node is a part of | |
| 95 | */ | |
| 96 | 173 | public final Document getDocument() { |
| 97 | 173 | Node parent = this; |
| 98 | 173 | while (parent != null && !(parent.isDocument())) { |
| 99 | 334 | parent = parent.getParent(); |
| 100 | } | |
| 101 | 173 | return (Document) parent; |
| 102 | } | |
| 103 | ||
| 104 | ||
| 105 | /** | |
| 106 | * | |
| 107 | * <p> | |
| 108 | * Returns the root of the subtree in which this node is found, | |
| 109 | * whether that's a document or an element. | |
| 110 | * </p> | |
| 111 | * | |
| 112 | * @return the document this node is a part of | |
| 113 | */ | |
| 114 | 1044 | final Node getRoot() { |
| 115 | ||
| 116 | 1044 | Node parent = this.getParent(); |
| 117 | 1044 | if (parent == null) { |
| 118 | 766 | return this; |
| 119 | } | |
| 120 | 278 | while (parent.getParent() != null) { |
| 121 | 210 | parent = parent.getParent(); |
| 122 | } | |
| 123 | 278 | return parent; |
| 124 | ||
| 125 | } | |
| 126 | ||
| 127 | ||
| 128 | /** | |
| 129 | * | |
| 130 | * <p> | |
| 131 | * Returns the base URI of this node as specified by | |
| 132 | * <a href="http://www.w3.org/TR/xmlbase/" target="_top">XML | |
| 133 | * Base</a>, or the empty string if this is not known. In most | |
| 134 | * cases, this is the URL against which relative URLs in this node | |
| 135 | * should be resolved. | |
| 136 | * </p> | |
| 137 | * | |
| 138 | * <p> | |
| 139 | * The base URI of a non-parent node is the base URI of the | |
| 140 | * element containing the node. The base URI of a document | |
| 141 | * node is the URI from which the document was parsed, | |
| 142 | * or which was set by calling <code>setBaseURI</code> on | |
| 143 | * on the document. | |
| 144 | * </p> | |
| 145 | * | |
| 146 | * <p> | |
| 147 | * The base URI of an element is determined as follows: | |
| 148 | * </p> | |
| 149 | * | |
| 150 | * <ul> | |
| 151 | * <li> | |
| 152 | * If the element has an <code>xml:base</code> attribute, | |
| 153 | * then the value of that attribute is | |
| 154 | * converted from an IRI to a URI, absolutized if possible, | |
| 155 | * and returned. | |
| 156 | * </li> | |
| 157 | * <li> | |
| 158 | * Otherwise, if any ancestor element of the element loaded | |
| 159 | * from the same entity has an <code>xml:base</code> | |
| 160 | * attribute, then the value of that attribute from the | |
| 161 | * nearest such ancestor is converted from an IRI to a URI, | |
| 162 | * absolutized if possible, and returned. | |
| 163 | * <em><code>xml:base</code> attributes from other entities are | |
| 164 | * not considered.</em> | |
| 165 | * </li> | |
| 166 | * <li> | |
| 167 | * Otherwise, if <code>setBaseURI()</code> has been invoked on | |
| 168 | * this element, then the URI most recently passed to that method | |
| 169 | * is absolutized if possible and returned. | |
| 170 | * </li> | |
| 171 | * <li> | |
| 172 | * Otherwise, if the element comes from an externally | |
| 173 | * parsed entity or the document entity, and the | |
| 174 | * original base URI has not been changed by invoking | |
| 175 | * <code>setBaseURI()</code>, then the URI of that entity is | |
| 176 | * returned. | |
| 177 | * </li> | |
| 178 | * <li> | |
| 179 | * Otherwise, (the element was created by a constructor | |
| 180 | * rather then being parsed from an existing document), the | |
| 181 | * base URI of the nearest ancestor that does have a base URI | |
| 182 | * is returned. If no ancestors have a base URI, then the | |
| 183 | * empty string is returned. | |
| 184 | * </li> | |
| 185 | * </ul> | |
| 186 | * | |
| 187 | * <p> | |
| 188 | * Absolutization takes place as specified by the | |
| 189 | * <a target="_top" href="http://www.w3.org/TR/xmlbase/">XML | |
| 190 | * Base specification</a>. However, it is not always possible to | |
| 191 | * absolutize a relative URI, in which case the empty string will | |
| 192 | * be returned. | |
| 193 | * </p> | |
| 194 | * | |
| 195 | * @return the base URI of this node | |
| 196 | */ | |
| 197 | 14 | public String getBaseURI() { |
| 198 | 1 | if (parent == null) return ""; |
| 199 | 13 | return parent.getBaseURI(); |
| 200 | } | |
| 201 | ||
| 202 | ||
| 203 | /** | |
| 204 | * | |
| 205 | * <p> | |
| 206 | * Returns the node that contains this node, | |
| 207 | * or null if this node does not have a parent. | |
| 208 | * </p> | |
| 209 | * | |
| 210 | * @return the element or document that most immediately | |
| 211 | * contains this node | |
| 212 | */ | |
| 213 | 16247923 | public final ParentNode getParent() { |
| 214 | 16247923 | return this.parent; |
| 215 | } | |
| 216 | ||
| 217 | ||
| 218 | 13032970 | final void setParent(ParentNode parent) { |
| 219 | 13032970 | this.parent = parent; |
| 220 | } | |
| 221 | ||
| 222 | ||
| 223 | /** | |
| 224 | * <p> | |
| 225 | * Removes this node from its parent so that it can be added | |
| 226 | * to a different parent node or document. This method does nothing | |
| 227 | * if the node does not have a parent. | |
| 228 | * </p> | |
| 229 | * | |
| 230 | * @throws XMLException if the parent refuses to detach this node | |
| 231 | */ | |
| 232 | 121707 | public void detach() { |
| 233 | ||
| 234 | 62 | if (parent == null) return; |
| 235 | 121645 | else if (this.isAttribute()) { |
| 236 | 3 | Element element = (Element) parent; |
| 237 | 3 | element.removeAttribute((Attribute) this); |
| 238 | } | |
| 239 | else { | |
| 240 | 121642 | parent.removeChild(this); |
| 241 | } | |
| 242 | ||
| 243 | } | |
| 244 | ||
| 245 | ||
| 246 | /** | |
| 247 | * <p> | |
| 248 | * Returns the child of this node at the specified position. | |
| 249 | * </p> | |
| 250 | * | |
| 251 | * @param position the index of the child node to return | |
| 252 | * | |
| 253 | * @return the position<sup>th</sup> child node of this node | |
| 254 | * | |
| 255 | * @throws IndexOutOfBoundsException if this node does not have children | |
| 256 | */ | |
| 257 | public abstract Node getChild(int position); | |
| 258 | ||
| 259 | ||
| 260 | /** | |
| 261 | * <p> | |
| 262 | * Returns the number of children of this node. | |
| 263 | * This is always non-negative (greater than or equal to zero). | |
| 264 | * </p> | |
| 265 | * | |
| 266 | * @return the number of children of this node | |
| 267 | */ | |
| 268 | public abstract int getChildCount(); | |
| 269 | ||
| 270 | ||
| 271 | /** | |
| 272 | * <p> | |
| 273 | * Returns a deep copy of this node with no parent, | |
| 274 | * that can be added to the current document or a different one. | |
| 275 | * </p> | |
| 276 | * | |
| 277 | * <p> | |
| 278 | * Per Bloch, the <code>Cloneable</code> | |
| 279 | * interface is just a mess and should | |
| 280 | * be avoided. However, I do not follow his suggestion of a copy | |
| 281 | * constructor exclusively because it is useful to be able to | |
| 282 | * copy a node without knowing its more specific type. | |
| 283 | * Ken Arnold agrees with this. It's more effective for | |
| 284 | * subclasses that can return an instance of the subclass. | |
| 285 | * </p> | |
| 286 | * | |
| 287 | * @return a copy of this node without a parent | |
| 288 | */ | |
| 289 | public abstract Node copy(); | |
| 290 | ||
| 291 | ||
| 292 | /** | |
| 293 | * <p> | |
| 294 | * Returns the actual XML form of this node, such as might be | |
| 295 | * copied and pasted from the original document. However, this | |
| 296 | * does not preserve semantically insignificant details such as | |
| 297 | * white space inside tags or the use of empty-element tags vs. | |
| 298 | * start-tag end-tag pairs. | |
| 299 | * </p> | |
| 300 | * | |
| 301 | * @return an XML representation of this node | |
| 302 | */ | |
| 303 | public abstract String toXML(); | |
| 304 | ||
| 305 | ||
| 306 | /** | |
| 307 | * <p> | |
| 308 | * Tests for node identity. That is, two | |
| 309 | * <code>Node</code> objects are equal | |
| 310 | * if and only if they are the same object. | |
| 311 | * </p> | |
| 312 | * | |
| 313 | * @param o the object compared for equality to this node | |
| 314 | * | |
| 315 | * @return true if <code>o</code> is this node; false otherwise | |
| 316 | * | |
| 317 | * @see java.lang.Object#equals(Object) | |
| 318 | */ | |
| 319 | 7830 | public final boolean equals(Object o) { |
| 320 | 7830 | return this == o; |
| 321 | } | |
| 322 | ||
| 323 | ||
| 324 | /** | |
| 325 | * <p> | |
| 326 | * Returns a unique identifier for this node. | |
| 327 | * The value returned is the same as returned by | |
| 328 | * <code>super.hashCode()</code> | |
| 329 | * because nodes use identity semantics. | |
| 330 | * </p> | |
| 331 | * | |
| 332 | * @return a probably unique identifier for this node | |
| 333 | * | |
| 334 | * @see java.lang.Object#hashCode() | |
| 335 | */ | |
| 336 | 986 | public final int hashCode() { |
| 337 | 986 | return super.hashCode(); |
| 338 | } | |
| 339 | ||
| 340 | ||
| 341 | /** | |
| 342 | * <p> | |
| 343 | * Returns the nodes selected by the XPath expression in the | |
| 344 | * context of this node in document order as defined in XSLT. | |
| 345 | * All namespace prefixes used in the | |
| 346 | * expression should be bound to namespace URIs by the | |
| 347 | * second argument. | |
| 348 | * </p> | |
| 349 | * | |
| 350 | * <p> | |
| 351 | * Note that XPath expressions operate on the XPath data model, | |
| 352 | * not the XOM data model. XPath counts all adjacent | |
| 353 | * <code>Text</code> objects as a single text node, and does not | |
| 354 | * consider empty <code>Text</code> objects. For instance, an | |
| 355 | * element that has exactly three text children in XOM, will | |
| 356 | * have exactly one text child in XPath, whose value is the | |
| 357 | * concatenation of all three XOM <code>Text</code> objects. | |
| 358 | * </p> | |
| 359 | * | |
| 360 | * <p> | |
| 361 | * You can use XPath expressions that use the namespace axis. | |
| 362 | * However, namespace nodes are never returned. If an XPath | |
| 363 | * expression only selects namespace nodes, then this method will | |
| 364 | * return an empty list. | |
| 365 | * </p> | |
| 366 | * | |
| 367 | * <p> | |
| 368 | * No variables are bound. | |
| 369 | * </p> | |
| 370 | * | |
| 371 | * <p> | |
| 372 | * The context position is the index of this node among its parents | |
| 373 | * children, counting adjacent text nodes as one. The context size | |
| 374 | * is the number of children this node's parent has, again counting | |
| 375 | * adjacent text nodes as one node. If the parent is a | |
| 376 | * <code>Document</code>, then the <code>DocType</code> (if any) is | |
| 377 | * not counted. If the node has no parent, then the context position | |
| 378 | * is 1, and the context size is 1. | |
| 379 | * </p> | |
| 380 | * | |
| 381 | * <p> | |
| 382 | * Queries such as /*, //, and /*//p that refer to the | |
| 383 | * root node do work when operating with a context node that is not | |
| 384 | * part of a document. However, the query / (return the root node) | |
| 385 | * throws an <code>XPathException</code> when applied to a node | |
| 386 | * that is not part of the document. Furthermore the top-level | |
| 387 | * node in the tree is treated as the first and only child of the | |
| 388 | * root node, not as the root node itself. For instance, this | |
| 389 | * query stores <code>parent</code> in the <code>result</code> | |
| 390 | * variable, not <code>child</code>: | |
| 391 | * </p> | |
| 392 | * | |
| 393 | * <pre><code> Element parent = new Element("parent"); | |
| 394 | * Element child = new Element("child"); | |
| 395 | * parent.appendChild(child); | |
| 396 | * Nodes results = child.query("/*"); | |
| 397 | * Node result = result.get(0);</code></pre> | |
| 398 | * | |
| 399 | * @param xpath the XPath expression to evaluate | |
| 400 | * @param namespaces a collection of namespace prefix bindings | |
| 401 | * used in the XPath expression | |
| 402 | * | |
| 403 | * @return a list of all matched nodes; possibly empty | |
| 404 | * | |
| 405 | * @throws XPathException if there's a syntax error in the | |
| 406 | * expression, the query returns something other than | |
| 407 | * a node-set | |
| 408 | * | |
| 409 | */ | |
| 410 | 635 | public final Nodes query(String xpath, XPathContext namespaces) { |
| 411 | ||
| 412 | 635 | if (this.isDocType()) { |
| 413 | 1 | throw new XPathException("Can't use XPath on a DocType"); |
| 414 | } | |
| 415 | 634 | DocumentFragment frag = null; |
| 416 | ||
| 417 | 634 | Node root = getRoot(); |
| 418 | 634 | if (! root.isDocument()) { |
| 419 | 159 | frag = new DocumentFragment(); |
| 420 | 159 | frag.appendChild(root); |
| 421 | } | |
| 422 | ||
| 423 | 634 | try { |
| 424 | 634 | JaxenConnector connector = new JaxenConnector(xpath); |
| 425 | 622 | if (namespaces == null) { |
| 426 | 277 | connector.setNamespaceContext(emptyContext); |
| 427 | } | |
| 428 | else { | |
| 429 | 345 | connector.setNamespaceContext(namespaces.getJaxenContext()); |
| 430 | } | |
| 431 | ||
| 432 | 622 | List queryResults = connector.selectNodes(this); |
| 433 | 594 | Iterator iterator = queryResults.iterator(); |
| 434 | 594 | while (iterator.hasNext()) { |
| 435 | 1824 | Object o = iterator.next(); |
| 436 | 1824 | try { |
| 437 | 1824 | Node n = (Node) o; |
| 438 | 1711 | if (n.isDocumentFragment()) { |
| 439 | 2 | iterator.remove(); |
| 440 | // Want to allow // and //* and so forth | |
| 441 | // but not / for rootless documents | |
| 442 | 2 | if (queryResults.isEmpty()) { |
| 443 | 1 | throw new XPathException("Tried to get document " |
| 444 | + "node of disconnected subtree"); | |
| 445 | } | |
| 446 | } | |
| 447 | } | |
| 448 | catch (ClassCastException ex) { | |
| 449 | 113 | XPathTypeException qex = new XPathTypeException( |
| 450 | "XPath expression " + xpath + " did not return a node-set.", | |
| 451 | queryResults.get(0)); | |
| 452 | 113 | qex.setXPath(xpath); |
| 453 | 113 | throw qex; |
| 454 | } | |
| 455 | } | |
| 456 | ||
| 457 | 480 | return new Nodes(queryResults); |
| 458 | } | |
| 459 | catch (XPathException ex) { | |
| 460 | 114 | ex.setXPath(xpath); |
| 461 | 114 | throw ex; |
| 462 | } | |
| 463 | catch (Exception ex) { // JaxenException and RuntimeException | |
| 464 | // I can't trigger a RuntimeException with the current Jaxen | |
| 465 | // code base; but it's been an issue in the past, and I'm | |
| 466 | // not convinced it's fully fixed now. | |
| 467 | 40 | XPathException xpe = new XPathException("XPath error: " + ex.getMessage(), ex); |
| 468 | 40 | xpe.setXPath(xpath); |
| 469 | 40 | throw xpe; |
| 470 | } | |
| 471 | finally { | |
| 472 | 159 | if (frag != null) frag.removeChild(0); |
| 473 | } | |
| 474 | ||
| 475 | } | |
| 476 | ||
| 477 | ||
| 478 | private static NamespaceContext emptyContext = new EmptyNamespaceContext(); | |
| 479 | ||
| 480 | private static class EmptyNamespaceContext implements NamespaceContext { | |
| 481 | ||
| 482 | 4409 | public String translateNamespacePrefixToUri(String prefix) { |
| 483 | // XML prefix is recognized automatically in Jaxen without | |
| 484 | // calling this method. | |
| 485 | // if ("xml".equals(prefix)) return Namespace.XML_NAMESPACE; | |
| 486 | 4409 | return null; |
| 487 | } | |
| 488 | ||
| 489 | ||
| 490 | } | |
| 491 | ||
| 492 | ||
| 493 | /** | |
| 494 | * <p> | |
| 495 | * Returns the nodes selected by the XPath expression in the | |
| 496 | * context of this node in document order as defined by XSLT. | |
| 497 | * This XPath expression must not contain | |
| 498 | * any namespace prefixes. | |
| 499 | * </p> | |
| 500 | * | |
| 501 | * <p> | |
| 502 | * No variables are bound. No namespace prefixes are bound. | |
| 503 | * </p> | |
| 504 | * | |
| 505 | * @param xpath the XPath expression to evaluate | |
| 506 | * | |
| 507 | * @return a list of all matched nodes; possibly empty | |
| 508 | * | |
| 509 | * @throws XPathException if there's a syntax error in the | |
| 510 | * expression; or the query returns something other than | |
| 511 | * a node-set | |
| 512 | */ | |
| 513 | 282 | public final Nodes query(String xpath) { |
| 514 | 282 | return query(xpath, null); |
| 515 | } | |
| 516 | ||
| 517 | ||
| 518 | // Methods to replace instanceof tests to improve performance | |
| 519 | 3819667 | boolean isElement() { |
| 520 | 3819667 | return false; |
| 521 | } | |
| 522 | ||
| 523 | 240727 | boolean isText() { |
| 524 | 240727 | return false; |
| 525 | } | |
| 526 | ||
| 527 | 42206 | boolean isComment() { |
| 528 | 42206 | return false; |
| 529 | } | |
| 530 | ||
| 531 | 42179 | boolean isProcessingInstruction() { |
| 532 | 42179 | return false; |
| 533 | } | |
| 534 | ||
| 535 | 121878 | boolean isAttribute() { |
| 536 | 121878 | return false; |
| 537 | } | |
| 538 | ||
| 539 | 2871951 | boolean isDocument() { |
| 540 | 2871951 | return false; |
| 541 | } | |
| 542 | ||
| 543 | 102498 | boolean isDocType() { |
| 544 | 102498 | return false; |
| 545 | } | |
| 546 | ||
| 547 | 1961 | boolean isDocumentFragment() { |
| 548 | 1961 | return false; |
| 549 | } | |
| 550 | ||
| 551 | ||
| 552 | } |
|
||||||||||