Changeset created on Thu Oct 13 09:15:37 UTC 2011 by Seecr (Seek You Too B.V.) Description: Proceed with resumptionToken if batch is empty ResumptionToken will now be saved, even if no records are found in a batch. Some repositories provides empty batches with an resumptionToken to proceed. Baseline version: https://meresco.svn.sourceforge.net/svnroot/meresco/meresco-harvester/workingsets/7.2.3-Edurep/version_0 diff --unidirectional-new-file --recursive --unified --exclude='*.so' --exclude='*.o' --exclude=.svn --exclude='*.pyc' --exclude=deps.d --exclude=applied version_0/meresco/harvester/harvester.py version_1/meresco/harvester/harvester.py --- version_0/meresco/harvester/harvester.py 2011-10-13 10:07:34.000000000 +0200 +++ version_1/meresco/harvester/harvester.py 2011-10-13 11:15:35.000000000 +0200 @@ -1,34 +1,35 @@ ## begin license ## -# -# "Meresco Harvester" consists of two subsystems, namely an OAI-harvester and -# a web-control panel. -# "Meresco Harvester" is originally called "Sahara" and was developed for -# SURFnet by: -# Seek You Too B.V. (CQ2) http://www.cq2.nl -# Copyright (C) 2006-2007 SURFnet B.V. http://www.surfnet.nl -# Copyright (C) 2007-2008 SURF Foundation. http://www.surf.nl -# Copyright (C) 2007-2011 Seek You Too (CQ2) http://www.cq2.nl -# Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. -# http://www.kennisnetictopschool.nl -# Copyright (C) 2009 Tilburg University http://www.uvt.nl -# Copyright (C) 2010-2011 Stichting Kennisnet http://www.kennisnet.nl -# -# This file is part of "Meresco Harvester" -# -# "Meresco Harvester" is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# "Meresco Harvester" is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with "Meresco Harvester"; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# +# +# "Meresco Harvester" consists of two subsystems, namely an OAI-harvester and +# a web-control panel. +# "Meresco Harvester" is originally called "Sahara" and was developed for +# SURFnet by: +# Seek You Too B.V. (CQ2) http://www.cq2.nl +# +# Copyright (C) 2006-2007 SURFnet B.V. http://www.surfnet.nl +# Copyright (C) 2007-2008 SURF Foundation. http://www.surf.nl +# Copyright (C) 2007-2011 Seek You Too (CQ2) http://www.cq2.nl +# Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. http://www.kennisnetictopschool.nl +# Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2010-2011 Stichting Kennisnet http://www.kennisnet.nl +# Copyright (C) 2011 Seecr (Seek You Too B.V.) http://seecr.nl +# +# This file is part of "Meresco Harvester" +# +# "Meresco Harvester" is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# "Meresco Harvester" is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with "Meresco Harvester"; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# ## end license ## import os, sys, shelve, time, traceback,re @@ -67,11 +68,10 @@ return self.any.listRecords(metadataPrefix=self._repository.metadataPrefix) def fetchRecords(self, from_, token): - records = self.listRecords(from_, token, self._repository.set) + records, resumptionToken = self.listRecords(from_, token, self._repository.set) for record in records: self.uploadRecord(record) - newtoken = getattr(records.parentNode, 'resumptionToken', None) - return newtoken + return resumptionToken def uploadRecord(self, record): upload = self.any.createUpload(self._repository, record) diff --unidirectional-new-file --recursive --unified --exclude='*.so' --exclude='*.o' --exclude=.svn --exclude='*.pyc' --exclude=deps.d --exclude=applied version_0/meresco/harvester/oairequest.py version_1/meresco/harvester/oairequest.py --- version_0/meresco/harvester/oairequest.py 2011-10-13 10:07:34.000000000 +0200 +++ version_1/meresco/harvester/oairequest.py 2011-10-13 11:15:35.000000000 +0200 @@ -11,9 +11,9 @@ # Copyright (C) 2007-2011 Seek You Too (CQ2) http://www.cq2.nl # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2011 Seecr (Seek You Too B.V.) http://seecr.nl # Copyright (C) 2011 Stichting Kennisnet http://www.kennisnet.nl # -# # This file is part of "Meresco Harvester" # # "Meresco Harvester" is free software; you can redistribute it and/or modify @@ -58,22 +58,23 @@ def __init__(self, url): self._url = url - def listRecords(self, **args): - if args.has_key('from_'): - args['from']=args['from_'] - del args['from_'] - args['verb']='ListRecords' + def listRecords(self, **kwargs): + if kwargs.has_key('from_'): + kwargs['from'] = kwargs['from_'] + del kwargs['from_'] + kwargs['verb'] = 'ListRecords' try: - response = self.request(args) + response = self.request(kwargs) except OAIError, e: if e.errorCode() != 'noRecordsMatch': raise e response = e.response - return wrapp(response.OAI_PMH).ListRecords.record + listRecords = wrapp(response.OAI_PMH).ListRecords + return listRecords.record, getattr(listRecords, 'resumptionToken', None) - def getRecord(self, **args): - args['verb'] = 'GetRecord' - response = self.request(args) + def getRecord(self, **kwargs): + kwargs['verb'] = 'GetRecord' + response = self.request(kwargs) return wrapp(response).OAI_PMH.GetRecord.record def identify(self): @@ -124,10 +125,10 @@ writefile.close() return binderytools.bind_file(filename) -def loggingListRecords(url, tempdir, **args): +def loggingListRecords(url, tempdir, **kwargs): """loggingListRecords(url, tempdir, **listRecordsArgs""" loair = LoggingOaiRequest(url, tempdir) - listRecords = loair.listRecords(**args) + listRecords = loair.listRecords(**kwargs) resumptionToken = str(listRecords.parentNode.resumptionToken) print resumptionToken while resumptionToken: diff --unidirectional-new-file --recursive --unified --exclude='*.so' --exclude='*.o' --exclude=.svn --exclude='*.pyc' --exclude=deps.d --exclude=applied version_0/test/oairequesttest.py version_1/test/oairequesttest.py --- version_0/test/oairequesttest.py 2011-10-13 10:07:33.000000000 +0200 +++ version_1/test/oairequesttest.py 2011-10-13 11:15:35.000000000 +0200 @@ -11,9 +11,9 @@ # Copyright (C) 2007-2011 Seek You Too (CQ2) http://www.cq2.nl # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2011 Seecr (Seek You Too B.V.) http://seecr.nl # Copyright (C) 2011 Stichting Kennisnet http://www.kennisnet.nl # -# # This file is part of "Meresco Harvester" # # "Meresco Harvester" is free software; you can redistribute it and/or modify @@ -60,11 +60,9 @@ self.assertEquals( u'badResumptionToken',e.errorCode()) def testListRecords(self): - records = self.request.listRecords(metadataPrefix='oai_dc') - i = 0 - for r in records: - i+=1 - self.assertEquals(3,i) + records, resumptionToken = self.request.listRecords(metadataPrefix='oai_dc') + self.assertEquals("TestToken", resumptionToken) + self.assertEquals(3,len(records)) self.assertEquals('oai:tudelft.nl:007087',str(records[0].header.identifier)) if records[0].header.deleted: self.fail() @@ -93,11 +91,9 @@ self.assertEquals('oai:rep:12345',record.header.identifier) def testListRecordsWithAnEmptyList(self): - records = self.request.listRecords(resumptionToken='EmptyListToken') - i = 0 - for record in records: i+=1 - self.assertEquals(0,i) - + records, resumptionToken = self.request.listRecords(resumptionToken='EmptyListToken') + self.assertEquals(0,len(records)) + self.assertEquals("", resumptionToken) def xtest_LIVE_Retrieve(self): request = OaiRequest('http://library.wur.nl/oai')