<?php
/* vim: set expandtab shiftwidth=4 tabstop=4 softtabstop=4 foldmethod=marker: */
/**
* Parser for MARC records
*
* This package is based on the PHP MARC package, originally called "php-marc",
* that is part of the Emilda Project (http://www.emilda.org). Christoffer
* Landtman generously agreed to make the "php-marc" code available under the
* GNU LGPL so it could be used as the basis of this PEAR package.
*
* PHP version 5
*
* LICENSE: This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* @category File_Formats
* @package File_MARC
* @author Christoffer Landtman <landtman@realnode.com>
* @author Dan Scott <dscott@laurentian.ca>
* @copyright 2003-2010 Oy Realnode Ab, Dan Scott
* @license http://www.gnu.org/copyleft/lesser.html LGPL License 2.1
* @version CVS: $Id$
* @link http://pear.php.net/package/File_MARC
* @example read.php Retrieve specific fields and subfields from a record
* @example subfields.php Create new subfields and add them in specific order
* @example marc_yaz.php Pretty print a MARC record retrieved through the PECL yaz extension
*/
require_once 'PEAR/Exception.php';
require_once 'File/MARCBASE.php';
require_once 'File/MARC/Record.php';
require_once 'File/MARC/Field.php';
require_once 'File/MARC/Control_Field.php';
require_once 'File/MARC/Data_Field.php';
require_once 'File/MARC/Subfield.php';
require_once 'File/MARC/Exception.php';
require_once 'File/MARC/List.php';
// {{{ class File_MARC
/**
* The main File_MARC class enables you to return File_MARC_Record
* objects from a stream or string.
*
* @category File_Formats
* @package File_MARC
* @author Christoffer Landtman <landtman@realnode.com>
* @author Dan Scott <dscott@laurentian.ca>
* @license http://www.gnu.org/copyleft/lesser.html LGPL License 2.1
* @link http://pear.php.net/package/File_MARC
*/
class File_MARC extends File_MARCBASE
{
// {{{ constants
/**
* MARC records retrieved from a file
*/
const SOURCE_FILE = 1;
/**
* MARC records retrieved from a binary string
*/
const SOURCE_STRING = 2;
/**
* Hexadecimal value for Subfield indicator
*/
const SUBFIELD_INDICATOR = "\x1F";
/**
* Hexadecimal value for End of Field
*/
const END_OF_FIELD = "\x1E";
/**
* Hexadecimal value for End of Record
*/
const END_OF_RECORD = "\x1D";
/**
* Length of the Directory
*/
const DIRECTORY_ENTRY_LEN = 12;
/**
* Length of the Leader
*/
const LEADER_LEN = 24;
/**
* Maximum record length
*/
const MAX_RECORD_LENGTH = 99999;
// }}}
// {{{ properties
/**
* Source containing raw records
*
* @var resource
*/
protected $source;
/**
* Source type (SOURCE_FILE or SOURCE_STRING)
*
* @var int
*/
protected $type;
/**
* XMLWriter for writing collections
*
* @var XMLWriter
*/
protected $xmlwriter;
// }}}
// {{{ Constructor: function __construct()
/**
* Read in MARC records
*
* This function reads in MARC record files or strings that
* contain one or more MARC records.
*
* <code>
* <?php
* // Retrieve MARC records from a file
* $journals = new File_MARC('journals.mrc', SOURCE_FILE);
*
* // Retrieve MARC records from a string (e.g. Z39 query results)
* $monographs = new File_MARC($raw_marc, SOURCE_STRING);
* ?>
* </code>
*
* @param string $source Name of the file, or a raw MARC string
* @param int $type Source of the input, either SOURCE_FILE or SOURCE_STRING
* @param string $record_class Record class, defaults to File_MARC_Record
*/
function __construct($source, $type = self::SOURCE_FILE, $record_class = null)
{
parent::__construct($source, $type, $record_class);
switch ($type) {
case self::SOURCE_FILE:
$this->type = self::SOURCE_FILE;
$this->source = fopen($source, 'rb');
if (!$this->source) {
$errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_FILE], array('filename' => $source));
throw new File_MARC_Exception($errorMessage, File_MARC_Exception::ERROR_INVALID_FILE);
}
break;
case self::SOURCE_STRING:
$this->type = self::SOURCE_STRING;
$this->source = explode(File_MARC::END_OF_RECORD, $source);
break;
default:
throw new File_MARC_Exception(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_SOURCE], File_MARC_Exception::ERROR_INVALID_SOURCE);
}
}
// }}}
// {{{ nextRaw()
/**
* Return the next raw MARC record
*
* Returns the next raw MARC record, unless all records already have
* been read.
*
* @return string Either a raw record or false
*/
function nextRaw()
{
if ($this->type == self::SOURCE_FILE) {
$record = stream_get_line($this->source, File_MARC::MAX_RECORD_LENGTH, File_MARC::END_OF_RECORD);
// Remove illegal stuff that sometimes occurs between records
$record = preg_replace('/^[\\x0a\\x0d\\x00]+/', "", $record);
} elseif ($this->type == self::SOURCE_STRING) {
$record = array_shift($this->source);
}
// Exit if we are at the end of the file
if (!$record) {
return false;
}
// Append the end of record we lost during stream_get_line() or explode()
$record .= File_MARC::END_OF_RECORD;
return $record;
}
// }}}
// {{{ next()
/**
* Return next {@link File_MARC_Record} object
*
* Decodes the next raw MARC record and returns the {@link File_MARC_Record}
* object.
* <code>
* <?php
* // Retrieve a set of MARC records from a file
* $journals = new File_MARC('journals.mrc', SOURCE_FILE);
*
* // Iterate through the retrieved records
* while ($record = $journals->next()) {
* print $record;
* print "\n";
* }
*
* ?>
* </code>
*
* @return File_MARC_Record next record, or false if there are
* no more records
*/
function next()
{
$raw = $this->nextRaw();
if ($raw) {
return $this->_decode($raw);
} else {
return false;
}
}
// }}}
// {{{ _decode()
/**
* Decode a given raw MARC record
*
* Port of Andy Lesters MARC::File::USMARC->decode() Perl function into PHP.
*
* @param string $text Raw MARC record
*
* @return File_MARC_Record Decoded File_MARC_Record object
*/
private function _decode($text)
{
$marc = new $this->record_class($this);
// fallback on the actual byte length
$record_length = strlen($text);
$matches = array();
if (preg_match("/^(\d{5})/", $text, $matches)) {
// Store record length
$record_length = $matches[1];
if ($record_length != strlen($text)) {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INCORRECT_LENGTH], array("record_length" => $record_length, "actual" => strlen($text))));
// Real beats declared byte length
$record_length = strlen($text);
}
} else {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_NONNUMERIC_LENGTH], array("record_length" => substr($text, 0, 5))));
}
if (substr($text, -1, 1) != File_MARC::END_OF_RECORD)
throw new File_MARC_Exception(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_TERMINATOR], File_MARC_Exception::ERROR_INVALID_TERMINATOR);
// Store leader
$marc->setLeader(substr($text, 0, File_MARC::LEADER_LEN));
// bytes 12 - 16 of leader give offset to the body of the record
$data_start = 0 + substr($text, 12, 5);
// immediately after the leader comes the directory (no separator)
$dir = substr($text, File_MARC::LEADER_LEN, $data_start - File_MARC::LEADER_LEN - 1); // -1 to allow for \x1e at end of directory
// character after the directory must be \x1e
if (substr($text, $data_start-1, 1) != File_MARC::END_OF_FIELD) {
$marc->addWarning(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_NO_DIRECTORY]);
}
// All directory entries 12 bytes long, so length % 12 must be 0
if (strlen($dir) % File_MARC::DIRECTORY_ENTRY_LEN != 0) {
$marc->addWarning(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_LENGTH]);
}
// go through all the fields
$nfields = strlen($dir) / File_MARC::DIRECTORY_ENTRY_LEN;
for ($n=0; $n<$nfields; $n++) {
// As pack returns to key 1, leave place 0 in list empty
list(, $tag) = unpack("A3", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
list(, $len) = unpack("A3/A4", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
list(, $offset) = unpack("A3/A4/A5", substr($dir, $n*File_MARC::DIRECTORY_ENTRY_LEN, File_MARC::DIRECTORY_ENTRY_LEN));
// Check directory validity
if (!preg_match("/^[0-9A-Za-z]{3}$/", $tag)) {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_TAG], array("tag" => $tag)));
}
if (!preg_match("/^\d{4}$/", $len)) {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_TAG_LENGTH], array("tag" => $tag, "len" => $len)));
}
if (!preg_match("/^\d{5}$/", $offset)) {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY_OFFSET], array("tag" => $tag, "offset" => $offset)));
}
if ($offset + $len > $record_length) {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_DIRECTORY], array("tag" => $tag)));
}
$tag_data = substr($text, $data_start + $offset, $len);
if (substr($tag_data, -1, 1) == File_MARC::END_OF_FIELD) {
/* get rid of the end-of-tag character */
$tag_data = substr($tag_data, 0, -1);
$len--;
} else {
$marc->addWarning(File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_FIELD_EOF], array("tag" => $tag)));
}
if (preg_match("/^\d+$/", $tag) and ($tag < 10)) {
$marc->appendField(new File_MARC_Control_Field($tag, $tag_data));
} else {
$subfields = explode(File_MARC::SUBFIELD_INDICATOR, $tag_data);
$indicators = array_shift($subfields);
if (strlen($indicators) != 2) {
$errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_INVALID_INDICATORS], array("tag" => $tag, "indicators" => $indicators));
$marc->addWarning($errorMessage);
// Do the best with the indicators we've got
if (strlen($indicators) == 1) {
$ind1 = $indicators;
$ind2 = " ";
} else {
list($ind1,$ind2) = array(" ", " ");
}
} else {
$ind1 = substr($indicators, 0, 1);
$ind2 = substr($indicators, 1, 1);
}
// Split the subfield data into subfield name and data pairs
$subfield_data = array();
foreach ($subfields as $subfield) {
if (strlen($subfield) > 0) {
$subfield_data[] = new File_MARC_Subfield(substr($subfield, 0, 1), substr($subfield, 1));
} else {
$errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_EMPTY_SUBFIELD], array("tag" => $tag));
$marc->addWarning($errorMessage);
}
}
if (!isset($subfield_data)) {
$errorMessage = File_MARC_Exception::formatError(File_MARC_Exception::$messages[File_MARC_Exception::ERROR_EMPTY_SUBFIELD], array("tag" => $tag));
$marc->addWarning($errorMessage);
}
// If the data is invalid, let's just ignore the one field
try {
$new_field = new File_MARC_Data_Field($tag, $subfield_data, $ind1, $ind2);
$marc->appendField($new_field);
} catch (Exception $e) {
$marc->addWarning($e->getMessage());
}
}
}
return $marc;
}
// }}}
}
// }}}