listUrl = $list_url; $this->namespaces = $namespaces; // Suppress errors during parsing, so we can pick them up after. libxml_use_internal_errors(TRUE); } /** * {@inheritdoc} * * Our public face is the URL we're getting items from */ public function __toString() { return $this->listUrl; } /** * {@inheritdoc} * * Load the XML at the given URL, and return an array of the IDs found * within it. */ public function getIdList() { migrate_instrument_start("Retrieve $this->listUrl"); $xml = simplexml_load_file($this->listUrl); migrate_instrument_stop("Retrieve $this->listUrl"); if ($xml !== FALSE) { $this->registerNamespaces($xml); return $this->getIDsFromXML($xml); } else { Migration::displayMessage(t( 'Loading of !listUrl failed:', array('!listUrl' => $this->listUrl) )); foreach (libxml_get_errors() as $error) { Migration::displayMessage(MigrateItemsXML::parseLibXMLError($error)); } return NULL; } } /** * Gets an array of the IDs found in a XML. * * Given an XML object, parse out the IDs for processing and return them as an * array. The default implementation assumes the IDs are simply the values of * the top-level elements - in most cases, you will need to override this to * reflect your particular XML structure. * * @param SimpleXMLElement $xml * Object from we get the ID's * * @return array * Extracted ID's */ protected function getIDsFromXML(SimpleXMLElement $xml) { $ids = array(); foreach ($xml as $element) { $ids[] = (string) $element; } // Additionally, if there are any namespaces registered, try to parse // elements with namespaces as well. if ($namespaces = $xml->getNamespaces()) { foreach ($namespaces as $prefix => $url) { foreach ($xml->children($url) as $element) { $ids[] = (string) $element; } } } return array_unique($ids); } /** * {@inheritdoc} * * Return a count of all available IDs from the source listing. * The default implementation assumes the count of top-level elements * reflects the number of IDs available - in many cases, you will need * to override this to reflect your particular XML structure. */ public function computeCount() { $xml = simplexml_load_file($this->listUrl); $this->registerNamespaces($xml); // Number of sourceid elements beneath the top-level element. $count = count($xml); // Additionally, if there are any namespaces registered, try to count // elements with namespaces as well. if ($namespaces = $xml->getNamespaces()) { foreach ($namespaces as $prefix => $url) { $count += count($xml->children($url)); } } return $count; } /** * Explicitly register namespaces on an XML element. * * @param SimpleXMLElement $xml * A SimpleXMLElement to register the namespaces on. */ protected function registerNamespaces(SimpleXMLElement &$xml) { foreach ($this->namespaces as $prefix => $namespace) { $xml->registerXPathNamespace($prefix, $namespace); } } } /** * Implementation of MigrateItem, for retrieving a parsed XML document given * an ID provided by a MigrateList class. */ class MigrateItemXML extends MigrateItem { /** * A URL pointing to an XML document containing the data for one item to be * migrated. * * @var string */ protected $itemUrl; /** * An array of namespaces to explicitly register before Xpath queries. * * @var array */ protected $namespaces; /** * {@inheritdoc} */ public function __construct($item_url, array $namespaces = array()) { parent::__construct(); $this->itemUrl = $item_url; $this->namespaces = $namespaces; // Suppress errors during parsing, so we can pick them up after. libxml_use_internal_errors(TRUE); } /** * Explicitly register namespaces on an XML element. * * @param SimpleXMLElement $xml * A SimpleXMLElement to register the namespaces on. */ protected function registerNamespaces(SimpleXMLElement &$xml) { foreach ($this->namespaces as $prefix => $namespace) { $xml->registerXPathNamespace($prefix, $namespace); } } /** * {@inheritdoc} * * Implementors are expected to return an object representing a source item. */ public function getItem($id) { // Make sure we actually have an ID. if (empty($id)) { return NULL; } $item_url = $this->constructItemUrl($id); // And make sure we actually got a URL to fetch. if (empty($item_url)) { return NULL; } // Get the XML object at the specified URL. $xml = $this->loadXmlUrl($item_url); if ($xml !== FALSE) { $this->registerNamespaces($xml); $return = new stdclass(); $return->xml = $xml; return $return; } else { $migration = Migration::currentMigration(); $message = t('Loading of !objecturl failed:', array('!objecturl' => $item_url)); foreach (libxml_get_errors() as $error) { $message .= "\n" . $error->message; } $migration->getMap()->saveMessage( array($id), $message, MigrationBase::MESSAGE_ERROR); libxml_clear_errors(); return NULL; } } /** * Creates a valid URL pointing to current item. * * The default implementation simply replaces the :id token in the URL with * the ID obtained from MigrateListXML. Override if the item URL is not so * easily expressed from the ID. * * @param mixed $id * XML item ID * * @return string * Formatted string with replaced tokens */ protected function constructItemUrl($id) { return str_replace(':id', $id, $this->itemUrl); } /** * Loads the XML. * * Default XML loader - just use Simplexml directly. This can be overridden * for preprocessing of XML (removal of unwanted elements, caching of XML if * the source service is slow, etc.) * * @param string $item_url * URL to the XML file * * @return SimpleXMLElement * Loaded XML */ protected function loadXmlUrl($item_url) { return simplexml_load_file($item_url); } /** * Implments MigrateItem::hash(). */ public function hash($row) { // $row->xml is a SimpleXMLElement. Temporarily set it as an XML string // to prevent parent::hash() failing when try to create the hash. migrate_instrument_start('MigrateItemXML::hash'); $hash = md5(serialize($row->xml->asXML())); migrate_instrument_stop('MigrateItemXML::hash'); return $hash; } } /** * Adds xpath info to field mappings for XML sources */ class MigrateXMLFieldMapping extends MigrateFieldMapping { /** * The xpath used to retrieve the data for this field from the XML. * * @var string */ protected $xpath; /** * Get xpath of current item. */ public function getXpath() { return $this->xpath; } /** * Add an xpath to this field mapping. * * @param string $xpath * xpath * * @return MigrateFieldMapping * MigrateFieldMapping */ public function xpath($xpath) { $this->xpath = $xpath; return $this; } } /** * Migrations using XML sources should extend this class instead of Migration. */ abstract class XMLMigration extends Migration { /** * {@inheritdoc} * * So we can create our special field mapping class. * * @todo Find a cleaner way to just substitute a different mapping class. * * @param string|null $destination_field * machine-name of destination field * @param string|null $source_field * name of source field * @param bool $warn_on_override * Set to FALSE to prevent warnings when there's an existing mapping * for this destination field. * * @return MigrateXMLFieldMapping * MigrateXMLFieldMapping */ public function addFieldMapping($destination_field, $source_field = NULL, $warn_on_override = TRUE) { // Warn of duplicate mappings. if ($warn_on_override && !is_null($destination_field) && isset($this->codedFieldMappings[$destination_field])) { self::displayMessage( t('!name addFieldMapping: !dest was previously mapped, overridden', array('!name' => $this->machineName, '!dest' => $destination_field)), 'warning'); } $mapping = new MigrateXMLFieldMapping($destination_field, $source_field); if (is_null($destination_field)) { $this->codedFieldMappings[] = $mapping; } else { $this->codedFieldMappings[$destination_field] = $mapping; } return $mapping; } /** * {@inheritdoc} * * A normal $data_row has all the input data as top-level fields - in this * case, however, the data is embedded within a SimpleXMLElement object in * $data_row->xml. Explode that out to the normal form, and pass on to the * normal implementation. */ protected function applyMappings() { // We only know what data to pull from the xpaths in the mappings. foreach ($this->getFieldMappings() as $mapping) { $source = $mapping->getSourceField(); if ($source && !isset($this->sourceValues->{$source})) { $xpath = $mapping->getXpath(); if ($xpath) { // Derived class may override applyXpath(). $source_value = $this->applyXpath($this->sourceValues, $xpath); if (!is_null($source_value)) { $this->sourceValues->$source = $source_value; } } } } parent::applyMappings(); } /** * Gets item from XML using the xpath. * * Default implementation - straightforward xpath application * * @param stdClass $data_row * row containing items. * @param string $xpath * xpath used to find the item * * @return SimpleXMLElement * found element */ public function applyXpath($data_row, $xpath) { $result = $data_row->xml->xpath($xpath); if ($result) { if (count($result) > 1) { $return = array(); foreach ($result as $record) { $return[] = (string) $record; } return $return; } else { return (string) $result[0]; } } else { return NULL; } } } /* ========================================================================== */ /* MultiItems Method */ /* ========================================================================== */ /** * Implementation of MigrateItems, for providing a list of IDs and for * retrieving a parsed XML document given an ID from this list. */ class MigrateItemsXML extends MigrateItems { /** * An array with all urls to available xml files. * * @var array */ protected $urls; /** * Define the current cursor over the urls array. * * @var string */ protected $currentUrl; /** * An array of namespaces to explicitly register before Xpath queries. * * @var array */ protected $namespaces; /** * Stores the loaded XML document from currentUrl. * * @var SimpleXMLElement */ protected $currentXml = FALSE; /** * To find the right url depending on the id, we'll build a map in the form of * an array('url1' => $ids, 'url2' => $ids, ...). * * @var array */ protected $idsMap = NULL; /** * Stores the id list from all urls. * * @var array */ protected $cacheIDs = NULL; /** * xpath identifying the element used for each item. * * @var string */ protected $itemXpath; /** * Gets xpath identifying the element used for each item. * * @return string * xpath */ public function getItemXpath() { return $this->itemXpath; } /** * xpath identifying the subelement under itemXpath that holds the id for * each item. * * @var string */ protected $itemIDXpath; /** * Getter for itemIDXpath. * * @return string */ public function getIDXpath() { return $this->itemIDXpath; } /** * {@inheritdoc} */ public function __construct($urls, $item_xpath = 'item', $item_id_xpath = 'id', array $namespaces = array()) { parent::__construct(); if (!is_array($urls)) { $urls = array($urls); } $this->urls = $urls; $this->itemXpath = $item_xpath; $this->itemIDXpath = $item_id_xpath; $this->namespaces = $namespaces; // Suppress errors during parsing, so we can pick them up after. libxml_use_internal_errors(TRUE); } /** * Explicitly register namespaces on an XML element. * * @param SimpleXMLElement $xml * A SimpleXMLElement to register the namespaces on. */ protected function registerNamespaces(SimpleXMLElement &$xml) { foreach ($this->namespaces as $prefix => $namespace) { $xml->registerXPathNamespace($prefix, $namespace); } } /** * Our public face is the URL list we're getting items from. */ public function __toString() { $urls = implode('
  • ', $this->urls); // Prepare a list of urls. $output = 'urls = '; $output .= '
    '; // Add selection rules to the end. $output .= 'item xpath = ' . $this->itemXpath . ' | '; $output .= 'item ID xpath = ' . $this->itemIDXpath; return $output; } /** * Load and return the xml from currentUrl. * * @return SimpleXMLElement * SimpleXMLElement */ public function &xml() { if (!empty($this->currentUrl)) { $this->currentXml = simplexml_load_file($this->currentUrl); if ($this->currentXml === FALSE) { Migration::displayMessage(t( 'Loading of !currentUrl failed:', array('!currentUrl' => $this->currentUrl) )); foreach (libxml_get_errors() as $error) { Migration::displayMessage(self::parseLibXMLError($error)); } } else { $this->registerNamespaces($this->currentXml); } } return $this->currentXml; } /** * Parses a LibXMLError to a error message string. * * @param LibXMLError $error * Error thrown by the XML * * @return string * Error message */ public static function parseLibXMLError(LibXMLError $error) { $error_code_name = 'Unknown Error'; switch ($error->level) { case LIBXML_ERR_WARNING: $error_code_name = t('Warning'); break; case LIBXML_ERR_ERROR: $error_code_name = t('Error'); break; case LIBXML_ERR_FATAL: $error_code_name = t('Fatal Error'); break; } return t( "!libxmlerrorcodename !libxmlerrorcode: !libxmlerrormessage\n" . "Line: !libxmlerrorline\n" . "Column: !libxmlerrorcolumn\n" . "File: !libxmlerrorfile", array( '!libxmlerrorcodename' => $error_code_name, '!libxmlerrorcode' => $error->code, '!libxmlerrormessage' => trim($error->message), '!libxmlerrorline' => $error->line, '!libxmlerrorcolumn' => $error->column, '!libxmlerrorfile' => (($error->file)) ? $error->file : NULL, ) ); } /** * Load ID's from URLs. * * Load ids from all urls and map them in idsMap depending on the currentURL. * * After ids were fetched from all urls store them in cacheIDs and return the * whole list. * * @return array * mapped ID's */ public function getIdList() { $ids = array(); foreach ($this->urls as $url) { migrate_instrument_start("Retrieve $url"); // Make sure, to load new xml. $this->currentUrl = $url; $xml = $this->xml(); if ($xml !== FALSE) { $url_ids = $this->getIdsFromXML($xml); $this->idsMap[$url] = $url_ids; $ids = array_merge($ids, $url_ids); } migrate_instrument_stop("Retrieve $url"); } if (!empty($ids)) { $this->cacheIDs = array_unique($ids); return $this->cacheIDs; } return NULL; } /** * Given an XML object, parse out the IDs for processing and return them as * an array. The location of the IDs in the XML are based on the item xpath * and item ID xpath set in the constructor. * eg, xpath = itemXpath . '/' . itemIDXpath * IDs are cached. The list of IDs are returned from the cache except when * this is the first call (ie, cache is NULL) OR the refresh parameter is * TRUE. * * @param SimpleXMLElement $xml * SimpleXMLElement * * @return array */ protected function getIDsFromXML(SimpleXMLElement $xml) { $result = $xml->xpath($this->itemXpath); $ids = array(); if ($result) { foreach ($result as $element) { if (!isset($element)) { continue; } // Namespaces must be reapplied after xpath(). $this->registerNamespaces($element); $id = $this->getItemID($element); if (!is_null($id)) { $ids[] = (string) $id; } } } return array_unique($ids); } /** * Return a count of all available IDs from the source listing. * * @return int * count of available IDs */ public function computeCount() { if (!isset($this->cacheIDs)) { $this->getIdList(); } return count($this->cacheIDs); } /** * Load the XML at the given URL, and return an array. * * @return array * array of the Items found within it. */ public function getAllItems() { $xml = $this->xml(); if ($xml !== FALSE) { return $this->getItemsFromXML($xml, TRUE); } return NULL; } protected $currentItems = NULL; /** * Parses out the items from a given XML object, and parse it's items. * * Given an XML object, parse out the items for processing and return them as * an array. The location of the items in the XML are based on the item xpath * set in the constructor. Items from currentUrl are cached. The list of items * returned from the cache except when this is the first call * (ie, cache is NULL) OR the refresh parameter is TRUE. * * Items are cached as an array of key=ID and value=stdClass object with * attribute xml containing the xml SimpleXMLElement object of the item. * * @param SimpleXMLElement $xml * XML to parse * @param bool $refresh * Indicates if necessary parse again the items or get them from cache. * * @return array * Array of obtained items. */ public function getItemsFromXML(SimpleXMLElement $xml, $refresh = FALSE) { if ($refresh !== FALSE && $this->currentItems != NULL) { return $this->currentItems; } $this->currentItems = NULL; $items = array(); $result = $xml->xpath($this->itemXpath); if ($result) { foreach ($result as $item_xml) { if (!isset($item_xml)) { continue; } // Namespaces must be reapplied after xpath(). $this->registerNamespaces($item_xml); $id = $this->getItemID($item_xml); $item = new stdclass(); $item->xml = $item_xml; $items[$id] = $item; } $this->currentItems = $items; return $this->currentItems; } else { return NULL; } } /** * Get the item ID from the itemXML based on itemIDXpath. * * @param SimpleXMLElement $item_xml * Element from we get the ID * * @return string * The item ID */ protected function getItemID($item_xml) { return $this->getElementValue($item_xml, $this->itemIDXpath); } /** * Get an element from the itemXML based on an xpath. * * @param SimpleXMLElement $item_xml * Element from we get the required value * @param string $xpath * xpath used to locate the value * * @return string * Extracted value */ protected function getElementValue($item_xml, $xpath) { $value = NULL; if ($item_xml->asXML()) { $result = $item_xml->xpath($xpath); if ($result) { $value = (string) $result[0]; } } return $value; } /** * Implementers are expected to return an object representing a source item. * Items from currentUrl are cached as an array of key=ID and value=stdClass * object with attribute xml containing the xml SimpleXMLElement object of the * item. * * @param mixed $id * * @return stdClass */ public function getItem($id) { // Make sure we actually have an ID. if (empty($id)) { return NULL; } // If $id is in currentXml return the right item immediately. if (isset($this->currentItems) && isset($this->currentItems[$id])) { $item = $this->currentItems[$id]; } else { // Otherwise find the right url and get the items from. if ($this->idsMap === NULL) { // Populate the map. $this->getIdList(); } foreach ($this->idsMap as $url => $ids) { if (in_array($id, $ids)) { $this->currentItems = NULL; $this->currentUrl = $url; $items = $this->getAllItems(); $item = $items[$id]; } } } if (!empty($item)) { return $item; } else { $migration = Migration::currentMigration(); $message = t('Loading of item XML for ID !id failed:', array('!id' => $id)); foreach (libxml_get_errors() as $error) { $message .= "\n" . $error->message; } $migration->getMap()->saveMessage( array($id), $message, MigrationBase::MESSAGE_ERROR); libxml_clear_errors(); return NULL; } } /** * {@inheritdoc} */ public function hash($row) { // $row->xml is a SimpleXMLElement. Temporarily set it as an XML string // to prevent parent::hash() failing when try to create the hash. migrate_instrument_start('MigrateItemXML::hash'); $hash = md5(serialize($row->xml->asXML())); migrate_instrument_stop('MigrateItemXML::hash'); return $hash; } } /** * Makes an XMLReader object iterable, returning elements matching a restricted * xpath-like syntax. */ class MigrateXMLReader implements Iterator { /** * The XMLReader we are encapsulating. * * @var XMLReader */ public $reader; /** * URL of the source XML file. * * @var string */ public $url; /** * Array of the element names from the query, 0-based from the first (root) * element. For example, '//file/article' would be stored as * array(0 => 'file', 1 => 'article'). * * @var array */ protected $elementsToMatch = array(); /** * If the element query is filtering by an attribute name=value, the name of * the attribute in question. * * @var string */ protected $attributeName = NULL; /** * If the element query is filtering by an attribute name=value, the value of * the attribute in question. * * @var string */ protected $attributeValue = NULL; /** * Array representing the path to the current element as we traverse the XML. * For example, if in an XML string like '
    ...
    ' * we are positioned within the article element, currentPath will be * array(0 => 'file', 1 => 'article'). * * @var array */ protected $currentPath = array(); /** * Query string used to retrieve the elements from the XML file. * * @var string */ public $elementQuery; /** * Xpath query string used to retrieve the primary key value from each * element. * * @var string */ public $idQuery; /** * Current element object when iterating. * * @var SimpleXMLElement */ protected $currentElement = NULL; /** * Value of the ID for the current element when iterating. * * @var string */ protected $currentId = NULL; /** * When matching element names, whether to compare to the namespace-prefixed * name, or the local name. * * @var bool */ protected $prefixedName = FALSE; /** * Prepares our extensions to the XMLReader object. * * @param string $xml_url * URL of the XML file to be parsed. * @param string $element_query * Query string in a restricted xpath format, for selecting elements to be * @param string $id_query * Query string to the unique identifier for an element, * relative to the root of that element. This supports the full * xpath syntax. */ public function __construct($xml_url, $element_query, $id_query) { $this->reader = new XMLReader(); $this->url = $xml_url; $this->elementQuery = $element_query; $this->idQuery = $id_query; // Suppress errors during parsing, so we can pick them up after. libxml_use_internal_errors(TRUE); // Parse the element query. First capture group is the element path, second // (if present) is the attribute. preg_match_all('|^/([^\[]+)(.*)$|', $element_query, $matches); $element_path = $matches[1][0]; $this->elementsToMatch = explode('/', $element_path); $attribute_query = $matches[2][0]; if ($attribute_query) { // Matches [@attribute="value"] (with either single- or double-quotes). preg_match_all('|^\[@([^=]+)=[\'"](.*)[\'"]\]$|', $attribute_query, $matches); $this->attributeName = $matches[1][0]; $this->attributeValue = $matches[2][0]; } // If the element path contains any colons, it must be specifying // namespaces, so we need to compare using the prefixed element // name in next(). if (strpos($element_path, ':')) { $this->prefixedName = TRUE; } } /** * Implementation of Iterator::rewind(). */ public function rewind() { // (Re)open the provided URL. $this->reader->close(); $status = $this->reader->open($this->url, NULL, LIBXML_NOWARNING); // Reset our path tracker. $this->currentPath = array(); if ($status) { // Load the first matching element and its ID. $this->next(); } else { Migration::displayMessage(t('Could not open XML file !url', array('!url' => $this->url))); } } /** * Implementation of Iterator::next(). */ public function next() { migrate_instrument_start('MigrateXMLReader::next'); $this->currentElement = $this->currentId = NULL; // Loop over each node in the XML file, looking for elements at a path // matching the input query string (represented in $this->elementsToMatch). while ($this->reader->read()) { if ($this->reader->nodeType == XMLREADER::ELEMENT) { if ($this->prefixedName) { $this->currentPath[$this->reader->depth] = $this->reader->name; } else { $this->currentPath[$this->reader->depth] = $this->reader->localName; } if ($this->currentPath == $this->elementsToMatch) { // We're positioned to the right element path - if filtering on an // attribute, check that as well before accepting this element. if (empty($this->attributeName) || ($this->reader->getAttribute($this->attributeName) == $this->attributeValue) ) { // We've found a matching element - get a SimpleXML object // representing it.We must associate the DOMNode with a // DOMDocument to be able to import // it into SimpleXML. // Despite appearances, this is almost twice as fast as // simplexml_load_string($this->readOuterXML()); $node = $this->reader->expand(); if ($node) { $dom = new DOMDocument(); $node = $dom->importNode($node, TRUE); $dom->appendChild($node); $this->currentElement = simplexml_import_dom($node); $idnode = $this->currentElement->xpath($this->idQuery); if (is_array($idnode)) { $this->currentId = (string) reset($idnode); } else { throw new Exception(t('Failure retrieving ID, xpath: !xpath', array('!xpath' => $this->idQuery))); } break; } else { foreach (libxml_get_errors() as $error) { $error_string = MigrateItemsXML::parseLibXMLError($error); if ($migration = Migration::currentMigration()) { $migration->saveMessage($error_string); } else { Migration::displayMessage($error_string); } } } } } } elseif ($this->reader->nodeType == XMLREADER::END_ELEMENT) { // Remove this element and any deeper ones from the current path. foreach ($this->currentPath as $depth => $name) { if ($depth >= $this->reader->depth) { unset($this->currentPath[$depth]); } } } } migrate_instrument_stop('MigrateXMLReader::next'); } /** * Implementation of Iterator::current(). * * @return null|SimpleXMLElement * Current item */ public function current() { return $this->currentElement; } /** * Implementation of Iterator::key(). * * @return null|string * Current key */ public function key() { return $this->currentId; } /** * Implementation of Iterator::valid(). * * @return bool * Indicates if current element is valid */ public function valid() { return $this->currentElement instanceof SimpleXMLElement; } } /** * Implementation of MigrateSource, to handle imports from XML files. */ class MigrateSourceXML extends MigrateSource { /** * @var $reader MigrateXMLReader */ protected $reader; /** * The MigrateXMLReader object serving as a cursor over the XML source. * * @return MigrateXMLReader * MigrateXMLReader */ public function getReader() { return $this->reader; } /** * The source URLs to load XML from * * @var array */ protected $sourceUrls = array(); /** * Holds our current position within the $source_urls array * * @var int */ protected $activeUrl = NULL; /** * An array of namespaces to explicitly register before Xpath queries. * * @var array */ protected $namespaces; /** * Store the query string used to recognize elements being iterated * so we can create reader objects on the fly. * * @var string */ protected $elementQuery = ''; /** * Store the query string used to retrieve the primary key value from each * element so we can create reader objects on the fly. * * @var string */ protected $idQuery = ''; /** * Store the reader class used to query XML so we can create reader objects * on the fly. * * @var string */ protected $readerClass = ''; /** * List of available source fields. * * @var array */ protected $fields = array(); /** * Source constructor. * * @param string|array $urls * URL(s) of the XML source data. * @param string $element_query * Query string used to recognize elements being iterated. * @param string $id_query * Xpath query string used to retrieve the primary key value * from each element. * @param array $fields * Optional - keys are field names, values are descriptions. Use to override * the default descriptions, or to add additional source fields which the * migration will add via other means (e.g., prepareRow()). * @param array $options * Options applied to this source. In addition to the standard MigrateSource * options, we support: * - reader_class: The reader class to instantiate for traversing the XML - * defaults to MigrateXMLReader (any substitutions must be derived from * MigrateXMLReader). */ public function __construct($urls, $element_query, $id_query, array $fields = array(), array $options = array(), array $namespaces = array()) { parent::__construct($options); if (empty($options['reader_class'])) { $reader_class = 'MigrateXMLReader'; } else { $reader_class = $options['reader_class']; } if (!is_array($urls)) { $urls = array($urls); } $this->sourceUrls = $urls; $this->activeUrl = NULL; $this->elementQuery = $element_query; $this->idQuery = $id_query; $this->readerClass = $reader_class; $this->fields = $fields; $this->namespaces = $namespaces; } /** * Explicitly register namespaces on an XML element. * * @param SimpleXMLElement $xml * A SimpleXMLElement to register the namespaces on. */ protected function registerNamespaces(SimpleXMLElement &$xml) { foreach ($this->namespaces as $prefix => $namespace) { $xml->registerXPathNamespace($prefix, $namespace); } } /** * Return a string representing the source query. * * @return string * source query */ public function __toString() { // Clump the urls into a string // This could cause a problem when using // a lot of urls, may need to hash. $urls = implode(', ', $this->sourceUrls); return 'urls = ' . $urls . ' | item xpath = ' . $this->elementQuery . ' | item ID xpath = ' . $this->idQuery; } /** * Returns a list of fields available to be mapped from the source query. * * @return array * keys: machine names of the fields (to be passed to addFieldMapping) * values: Human-friendly descriptions of the fields. */ public function fields() { return $this->fields; } /** * Returns the active Url. * * @return string * active Url */ public function activeUrl() { if (!is_null($this->activeUrl)) { return $this->sourceUrls[$this->activeUrl]; } } /** * Return a count of all available source records. */ public function computeCount() { $count = 0; foreach ($this->sourceUrls as $url) { $reader = new $this->readerClass($url, $this->elementQuery, $this->idQuery); foreach ($reader as $element) { $count++; } } return $count; } /** * Implementation of MigrateSource::performRewind(). */ public function performRewind() { // Set the reader back to the beginning of the file (positioned to the // first matching element), then apply our logic to make sure we have the // first element fulfilling our logic (idlist/map/prepareRow()). $this->activeUrl = NULL; $this->reader = NULL; } /** * Implementation of MigrationSource::getNextRow(). * * @return stdClass * data for the next row from the XML source files */ public function getNextRow() { migrate_instrument_start('MigrateSourceXML::next'); $source_key = $this->activeMap->getSourceKey(); $key_name = key($source_key); $row = NULL; // The reader is now lazy loaded, so it may // not be defined yet, need to test if set. if (isset($this->reader)) { // Attempt to load the next row. $this->reader->next(); } // Test the reader for a valid row. if (isset($this->reader) && $this->reader->valid()) { $row = new stdClass(); $row->$key_name = $this->reader->key(); $row->xml = $this->reader->current(); $this->registerNamespaces($row->xml); } else { // The current source is at the end, try to load the next source. if ($this->getNextSource()) { $row = new stdClass(); $row->$key_name = $this->reader->key(); $row->xml = $this->reader->current(); $this->registerNamespaces($row->xml); } } migrate_instrument_stop('MigrateSourceXML::next'); return $row; } /** * Advances the reader to the next source from source_urls. * * @return bool * TRUE if a valid source was loaded */ public function getNextSource() { migrate_instrument_start('MigrateSourceXML::nextSource'); // Return value. $status = FALSE; while ($this->activeUrl === NULL || (count($this->sourceUrls) - 1) > $this->activeUrl) { if (is_null($this->activeUrl)) { $this->activeUrl = 0; } else { // Increment the activeUrl so we try to load the next source. $this->activeUrl = $this->activeUrl + 1; } $this->reader = new $this->readerClass( $this->sourceUrls[$this->activeUrl], $this->elementQuery, $this->idQuery); $this->reader->rewind(); if ($this->reader->valid()) { // We have a valid source. $status = TRUE; break; } } migrate_instrument_stop('MigrateSourceXML::nextSource'); return $status; } /** * {@inheritdoc} */ protected function hash($row) { // $row->xml is a SimpleXMLElement. Temporarily set it as an XML string // to prevent parent::hash() failing when try to create the hash. return parent::hash($row->xml->asXML()); } }