# reposync.py
# DNF plugin adding a command to download all packages from given remote repo.
#
# Copyright (C) 2014 Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# the GNU General Public License v.2, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY expressed or implied, including the implied warranties of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details. You should have received a copy of the
# GNU General Public License along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
# source code or documentation are not subject to the GNU General Public
# License and may only be used or replicated with the express permission of
# Red Hat, Inc.
#
from __future__ import absolute_import
from __future__ import unicode_literals
import hawkey
import os
import shutil
import types
from dnfpluginscore import _, logger
from dnf.cli.option_parser import OptionParser
import dnf
import dnf.cli
def _pkgdir(intermediate, target):
cwd = dnf.i18n.ucd(os.getcwd())
return os.path.realpath(os.path.join(cwd, intermediate, target))
class RPMPayloadLocation(dnf.repo.RPMPayload):
def __init__(self, pkg, progress, pkg_location):
super(RPMPayloadLocation, self).__init__(pkg, progress)
self.package_dir = os.path.dirname(pkg_location)
def _target_params(self):
tp = super(RPMPayloadLocation, self)._target_params()
dnf.util.ensure_dir(self.package_dir)
tp['dest'] = self.package_dir
return tp
@dnf.plugin.register_command
class RepoSyncCommand(dnf.cli.Command):
aliases = ('reposync',)
summary = _('download all packages from remote repo')
def __init__(self, cli):
super(RepoSyncCommand, self).__init__(cli)
@staticmethod
def set_argparser(parser):
parser.add_argument('-a', '--arch', dest='arches', default=[],
action=OptionParser._SplitCallback, metavar='[arch]',
help=_('download only packages for this ARCH'))
parser.add_argument('--delete', default=False, action='store_true',
help=_('delete local packages no longer present in repository'))
parser.add_argument('--download-metadata', default=False, action='store_true',
help=_('download all the metadata.'))
parser.add_argument('-g', '--gpgcheck', default=False, action='store_true',
help=_('Remove packages that fail GPG signature checking '
'after downloading'))
parser.add_argument('-m', '--downloadcomps', default=False, action='store_true',
help=_('also download and uncompress comps.xml'))
parser.add_argument('--metadata-path',
help=_('where to store downloaded repository metadata. '
'Defaults to the value of --download-path.'))
parser.add_argument('-n', '--newest-only', default=False, action='store_true',
help=_('download only newest packages per-repo'))
parser.add_argument('--norepopath', default=False, action='store_true',
help=_("Don't add the reponame to the download path."))
parser.add_argument('-p', '--download-path', default='./',
help=_('where to store downloaded repositories'))
parser.add_argument('--remote-time', default=False, action='store_true',
help=_('try to set local timestamps of local files by '
'the one on the server'))
parser.add_argument('--source', default=False, action='store_true',
help=_('download only source packages'))
parser.add_argument('-u', '--urls', default=False, action='store_true',
help=_("Just list urls of what would be downloaded, "
"don't download"))
def configure(self):
demands = self.cli.demands
demands.available_repos = True
demands.sack_activation = True
repos = self.base.repos
if self.opts.repo:
repos.all().disable()
for repoid in self.opts.repo:
try:
repo = repos[repoid]
except KeyError:
raise dnf.cli.CliError("Unknown repo: '%s'." % repoid)
repo.enable()
if self.opts.source:
repos.enable_source_repos()
if len(list(repos.iter_enabled())) > 1 and self.opts.norepopath:
raise dnf.cli.CliError(
_("Can't use --norepopath with multiple repositories"))
for repo in repos.iter_enabled():
repo._repo.expire()
repo.deltarpm = False
def run(self):
self.base.conf.keepcache = True
gpgcheck_ok = True
for repo in self.base.repos.iter_enabled():
if self.opts.remote_time:
repo._repo.setPreserveRemoteTime(True)
if self.opts.download_metadata:
if self.opts.urls:
for md_type, md_location in repo._repo.getMetadataLocations():
url = repo.remote_location(md_location)
if url:
print(url)
else:
msg = _("Failed to get mirror for metadata: %s") % md_type
logger.warning(msg)
else:
self.download_metadata(repo)
if self.opts.downloadcomps:
if self.opts.urls:
mdl = dict(repo._repo.getMetadataLocations())
group_locations = [mdl[md_type]
for md_type in ('group', 'group_gz', 'group_gz_zck')
if md_type in mdl]
if group_locations:
for group_location in group_locations:
url = repo.remote_location(group_location)
if url:
print(url)
break
else:
msg = _("Failed to get mirror for the group file.")
logger.warning(msg)
else:
self.getcomps(repo)
pkglist = self.get_pkglist(repo)
if self.opts.urls:
self.print_urls(pkglist)
else:
self.download_packages(pkglist)
if self.opts.gpgcheck:
for pkg in pkglist:
local_path = self.pkg_download_path(pkg)
# base.package_signature_check uses pkg.localPkg() to determine
# the location of the package rpm file on the disk.
# Set it to the correct download path.
pkg.localPkg = types.MethodType(
lambda s, local_path=local_path: local_path, pkg)
result, error = self.base.package_signature_check(pkg)
if result != 0:
logger.warning(_("Removing {}: {}").format(
os.path.basename(local_path), error))
os.unlink(local_path)
gpgcheck_ok = False
if self.opts.delete:
self.delete_old_local_packages(repo, pkglist)
if not gpgcheck_ok:
raise dnf.exceptions.Error(_("GPG signature check failed."))
def repo_target(self, repo):
return _pkgdir(self.opts.destdir or self.opts.download_path,
repo.id if not self.opts.norepopath else '')
def metadata_target(self, repo):
if self.opts.metadata_path:
return _pkgdir(self.opts.metadata_path, repo.id)
else:
return self.repo_target(repo)
def pkg_download_path(self, pkg):
repo_target = self.repo_target(pkg.repo)
pkg_download_path = os.path.realpath(
os.path.join(repo_target, pkg.location))
# join() ensures repo_target ends with a path separator (otherwise the
# check would pass if pkg_download_path was a "sibling" path component
# of repo_target that has the same prefix).
if not pkg_download_path.startswith(os.path.join(repo_target, '')):
raise dnf.exceptions.Error(
_("Download target '{}' is outside of download path '{}'.").format(
pkg_download_path, repo_target))
return pkg_download_path
def delete_old_local_packages(self, repo, pkglist):
# delete any *.rpm file under target path, that was not downloaded from repository
downloaded_files = set(self.pkg_download_path(pkg) for pkg in pkglist)
for dirpath, dirnames, filenames in os.walk(self.repo_target(repo)):
for filename in filenames:
path = os.path.join(dirpath, filename)
if filename.endswith('.rpm') and os.path.isfile(path):
if path not in downloaded_files:
# Delete disappeared or relocated file
try:
os.unlink(path)
logger.info(_("[DELETED] %s"), path)
except OSError:
logger.error(_("failed to delete file %s"), path)
def getcomps(self, repo):
comps_fn = repo._repo.getCompsFn()
if comps_fn:
dest_path = self.metadata_target(repo)
dnf.util.ensure_dir(dest_path)
dest = os.path.join(dest_path, 'comps.xml')
dnf.yum.misc.decompress(comps_fn, dest=dest)
logger.info(_("comps.xml for repository %s saved"), repo.id)
def download_metadata(self, repo):
repo_target = self.metadata_target(repo)
repo._repo.downloadMetadata(repo_target)
return True
def _get_latest(self, query):
"""
return union of these queries:
- the latest NEVRAs from non-modular packages
- all packages from stream version with the latest package NEVRA
(this should not be needed but the latest package NEVRAs might be
part of an older module version)
- all packages from the latest stream version
"""
if not dnf.base.WITH_MODULES:
return query.latest()
query.apply()
module_packages = self.base._moduleContainer.getModulePackages()
all_artifacts = set()
module_dict = {} # {NameStream: {Version: [modules]}}
artifact_version = {} # {artifact: {NameStream: [Version]}}
for module_package in module_packages:
artifacts = module_package.getArtifacts()
all_artifacts.update(artifacts)
module_dict.setdefault(module_package.getNameStream(), {}).setdefault(
module_package.getVersionNum(), []).append(module_package)
for artifact in artifacts:
artifact_version.setdefault(artifact, {}).setdefault(
module_package.getNameStream(), []).append(module_package.getVersionNum())
# the latest NEVRAs from non-modular packages
latest_query = query.filter(
pkg__neq=query.filter(nevra_strict=all_artifacts)).latest()
# artifacts from the newest version and those versions that contain an artifact
# with the highest NEVRA
latest_stream_artifacts = set()
for namestream, version_dict in module_dict.items():
# versions that will be synchronized
versions = set()
# add the newest stream version
versions.add(sorted(version_dict.keys(), reverse=True)[0])
# collect all artifacts in all stream versions
stream_artifacts = set()
for modules in version_dict.values():
for module in modules:
stream_artifacts.update(module.getArtifacts())
# find versions to which the packages with the highest NEVRAs belong
for latest_pkg in query.filter(nevra_strict=stream_artifacts).latest():
# here we depend on modules.yaml allways containing full NEVRA (including epoch)
nevra = "{0.name}-{0.epoch}:{0.version}-{0.release}.{0.arch}".format(latest_pkg)
# download only highest version containing the latest artifact
versions.add(max(artifact_version[nevra][namestream]))
# add all artifacts from selected versions for synchronization
for version in versions:
for module in version_dict[version]:
latest_stream_artifacts.update(module.getArtifacts())
latest_query = latest_query.union(query.filter(nevra_strict=latest_stream_artifacts))
return latest_query
def get_pkglist(self, repo):
query = self.base.sack.query(flags=hawkey.IGNORE_MODULAR_EXCLUDES).available().filterm(
reponame=repo.id)
if self.opts.newest_only:
query = self._get_latest(query)
if self.opts.source:
query.filterm(arch='src')
elif self.opts.arches:
query.filterm(arch=self.opts.arches)
return query
def download_packages(self, pkglist):
base = self.base
progress = base.output.progress
if progress is None:
progress = dnf.callback.NullDownloadProgress()
drpm = dnf.drpm.DeltaInfo(base.sack.query(flags=hawkey.IGNORE_MODULAR_EXCLUDES).installed(),
progress, 0)
payloads = [RPMPayloadLocation(pkg, progress, self.pkg_download_path(pkg))
for pkg in pkglist]
base._download_remote_payloads(payloads, drpm, progress, None, False)
def print_urls(self, pkglist):
for pkg in pkglist:
url = pkg.remote_location()
if url:
print(url)
else:
msg = _("Failed to get mirror for package: %s") % pkg.name
logger.warning(msg)