Skip to content

Commit

Permalink
Merge pull request #606 from rhubert/urlscm-bob-download
Browse files Browse the repository at this point in the history
Place url-scm download files in .bob-download
  • Loading branch information
jkloetzke authored Jan 21, 2025
2 parents 2d829b4 + c619d27 commit d021b99
Show file tree
Hide file tree
Showing 20 changed files with 367 additions and 109 deletions.
16 changes: 16 additions & 0 deletions doc/manual/policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,22 @@ New behavior

Unmanaged layers are expected in the same directory.

urlScmSeparateDownload
~~~~~~~~~~~~~~~~~~~~~~

Introduced in: 1.0

This policy controls where bob places downloaded files of UrlScms if extraction is
used.

Old behavior
The downloaded file could be found in the workspace next to the extracted files.

New behavior
The downloaded file is stored next to the workspace in a separate download folder.
Only the extracted content is in the workspace.


.. _policies-obsolete:

Obsolete policies
Expand Down
2 changes: 2 additions & 0 deletions pym/bob/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,8 @@ async def _cookCheckoutStep(self, checkoutStep, depth):
os.makedirs(atticPath)
atticPath = os.path.join(atticPath, atticName)
os.rename(scmPath, atticPath)
if scmDir in scmMap:
scmMap[scmDir].postAttic(prettySrcPath)
BobState().setAtticDirectoryState(atticPath, scmSpec)
atticPaths.add(scmPath, atticPath)
del oldCheckoutState[scmDir]
Expand Down
6 changes: 6 additions & 0 deletions pym/bob/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -3029,6 +3029,7 @@ class RecipeSet:
schema.Optional('defaultFileMode') : bool,
schema.Optional('substituteMetaEnv') : bool,
schema.Optional('managedLayers') : bool,
schema.Optional('urlScmSeparateDownload') : bool,
},
error="Invalid policy specified! Are you using an appropriate version of Bob?"
),
Expand Down Expand Up @@ -3083,6 +3084,11 @@ class RecipeSet:
InfoOnce("managedLayers policy is not set. Only unmanaged layers are supported.",
help="See http://bob-build-tool.readthedocs.io/en/latest/manual/policies.html#managedlayers for more information.")
),
"urlScmSeparateDownload": (
"0.25.1.dev27",
InfoOnce("urlScmSeparateDownload policy is not set. Extracted archives of the 'url' SCM are retained in the workspace.",
help="See http://bob-build-tool.readthedocs.io/en/latest/manual/policies.html#urlscmseparatedownload for more information.")
)
}

_ignoreCmdConfig = False
Expand Down
1 change: 1 addition & 0 deletions pym/bob/intermediate.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ def fromRecipeSet(cls, recipeSet):
'gitCommitOnBranch' : recipeSet.getPolicy('gitCommitOnBranch'),
'fixImportScmVariant' : recipeSet.getPolicy('fixImportScmVariant'),
'defaultFileMode' : recipeSet.getPolicy('defaultFileMode'),
'urlScmSeparateDownload' : recipeSet.getPolicy('urlScmSeparateDownload'),
}
self.__data['archiveSpec'] = recipeSet.archiveSpec()
self.__data['envWhiteList'] = sorted(recipeSet.envWhiteList())
Expand Down
3 changes: 2 additions & 1 deletion pym/bob/scm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def getScm(spec, overrides=[], recipeSet=None):
recipeSet and recipeSet.getPolicy('scmIgnoreUser'),
recipeSet.getPreMirrors() if recipeSet else [],
recipeSet.getFallbackMirrors() if recipeSet else [],
recipeSet and recipeSet.getPolicy('defaultFileMode'))
recipeSet and recipeSet.getPolicy('defaultFileMode'),
recipeSet and recipeSet.getPolicy('urlScmSeparateDownload'))
else:
raise ParseError("Unknown SCM '{}'".format(scm))
3 changes: 3 additions & 0 deletions pym/bob/scm/scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ def calcLiveBuildId(self, workspacePath):
"""Calculate live build-id from workspace."""
return None

def postAttic(self, workspace):
pass

class ScmAudit(metaclass=ABCMeta):
@classmethod
async def fromDir(cls, workspace, dir, extra):
Expand Down
222 changes: 161 additions & 61 deletions pym/bob/scm/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
replacePath
from .scm import Scm, ScmAudit
from http.client import HTTPException
from abc import abstractmethod
import asyncio
import concurrent.futures.process
import contextlib
Expand Down Expand Up @@ -155,6 +156,123 @@ def dumpMode(mode):

isWin32 = sys.platform == "win32"


class Extractor():
def __init__(self, dir, file, strip, separateDownload):
self.dir = dir
self.file = file
self.strip = strip
self.separateDownload = separateDownload

async def _extract(self, cmds, invoker):
destination = self.getCompressedFilePath(invoker)
canary = destination+".extracted"
if isYounger(destination, canary):
for cmd in cmds:
if shutil.which(cmd[0]) is None: continue
await invoker.checkCommand(cmd, cwd=self.dir)
invoker.trace("<touch>", canary)
with open(canary, "wb") as f:
pass
os.utime(canary)
break
else:
invoker.fail("No suitable extractor found!")

def getCompressedFilePath(self, invoker):
downloadFolder = os.path.join(os.pardir, "download") if self.separateDownload else ""
return os.path.abspath(invoker.joinPath(downloadFolder, self.dir, self.file)) \

@abstractmethod
async def extract(self, invoker, destination, cwd):
return False

# Use the Python tar/zip extraction only on Windows. They are slower and in
# case of tarfile broken in certain ways (e.g. tarfile will result in
# different file modes!). But it shouldn't make a difference on Windows.
class TarExtractor(Extractor):
def __init__(self, dir, file, strip, separateDownload):
super().__init__(dir, file, strip, separateDownload)

async def extract(self, invoker):
cmds = []
compressedFilePath = self.getCompressedFilePath(invoker)
if isWin32 and self.strip == 0:
cmds.append(["python", "-m", "tarfile", "-e", compressedFilePath])

cmd = ["tar", "-x", "--no-same-owner", "--no-same-permissions",
"-f", compressedFilePath]
if self.strip > 0:
cmd.append("--strip-components={}".format(self.strip))
cmds.append(cmd)

await self._extract(cmds, invoker)


class ZipExtractor(Extractor):
def __init__(self, dir, file, strip, separateDownload):
super().__init__(dir, file, strip, separateDownload)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmds = []
compressedFilePath = self.getCompressedFilePath(invoker)
if isWin32:
cmds.append(["python", "-m", "zipfile",
"-e", compressedFilePath, "."])

cmds.append(["unzip", "-o", compressedFilePath])
await self._extract(cmds, invoker)


class GZipExtractor(Extractor):
def __init__(self, dir, file, strip, separateDownload):
super().__init__(dir, file, strip, separateDownload)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
# gunzip extracts the file at the location of the input file. Copy the
# downloaded file to the workspace directory prio to uncompressing it
cmd = ["gunzip"]
if self.separateDownload:
shutil.copyfile(self.getCompressedFilePath(invoker),
invoker.joinPath(self.dir, self.file))
else:
cmd.append("-k")
cmd.extend(["-f", self.file])
await self._extract([cmd], invoker)


class XZExtractor(Extractor):
def __init__(self, dir, file, strip, separateDownload):
super().__init__(dir, file, strip, separateDownload)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmd = ["unxz"]
if self.separateDownload:
shutil.copyfile(self.getCompressedFilePath(invoker),
invoker.joinPath(self.dir, self.file))
else:
cmd.append("-k")
cmd.extend(["-f", self.file])
await self._extract([cmd], invoker)


class SevenZipExtractor(Extractor):
def __init__(self, dir, file, strip, separateDownload):
super().__init__(dir, file, strip, separateDownload)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmds = [["7z", "x", "-y", self.getCompressedFilePath(invoker)]]
await self._extract(cmds, invoker)


class UrlScm(Scm):

__DEFAULTS = {
Expand Down Expand Up @@ -212,31 +330,17 @@ class UrlScm(Scm):
(".zip", "zip"),
]

# Use the Python tar/zip extraction only on Windows. They are slower and in
# case of tarfile broken in certain ways (e.g. tarfile will result in
# different file modes!). But it shouldn't make a difference on Windows.
EXTRACTORS = {
"tar" : [
(isWin32, "python", ["-m", "tarfile", "-e", "{}"], None),
(True, "tar", ["-x", "--no-same-owner", "--no-same-permissions", "-f", "{}"], "--strip-components={}"),
],
"gzip" : [
(True, "gunzip", ["-kf", "{}"], None),
],
"xz" : [
(True, "unxz", ["-kf", "{}"], None),
],
"7z" : [
(True, "7z", ["x", "-y", "{}"], None),
],
"zip" : [
(isWin32, "python", ["-m", "zipfile", "-e", "{}", "."], None),
(True, "unzip", ["-o", "{}"], None),
],
"tar" : TarExtractor,
"gzip" : GZipExtractor,
"xz" : XZExtractor,
"7z" : SevenZipExtractor,
"zip" : ZipExtractor,
}

def __init__(self, spec, overrides=[], stripUser=None,
preMirrors=[], fallbackMirrors=[], defaultFileMode=None):
preMirrors=[], fallbackMirrors=[], defaultFileMode=None,
separateDownload=False):
super().__init__(spec, overrides)
self.__url = spec["url"]
self.__digestSha1 = spec.get("digestSHA1")
Expand Down Expand Up @@ -275,6 +379,7 @@ def __init__(self, spec, overrides=[], stripUser=None,
self.__fallbackMirrorsUrls = spec.get("fallbackMirrors")
self.__fallbackMirrorsUpload = spec.get("__fallbackMirrorsUpload")
self.__fileMode = spec.get("fileMode", 0o600 if defaultFileMode else None)
self.__separateDownload = spec.get("__separateDownload", separateDownload)

def getProperties(self, isJenkins, pretty=False):
ret = super().getProperties(isJenkins)
Expand All @@ -295,6 +400,7 @@ def getProperties(self, isJenkins, pretty=False):
'fallbackMirrors' : self.__getFallbackMirrorsUrls(),
'__fallbackMirrorsUpload' : self.__getFallbackMirrorsUpload(),
'fileMode' : dumpMode(self.__fileMode) if pretty else self.__fileMode,
'__separateDownload': self.__separateDownload,
})
return ret

Expand Down Expand Up @@ -517,6 +623,9 @@ async def _put(self, invoker, workspaceFile, source, url):
invoker.fail("Upload not supported for URL scheme: " + url.scheme)

def canSwitch(self, oldScm):
if self.__separateDownload != oldScm.__separateDownload:
return False

diff = self._diffSpec(oldScm)
if "scm" in diff:
return False
Expand Down Expand Up @@ -551,7 +660,16 @@ async def switch(self, invoker, oldScm):
async def invoke(self, invoker):
os.makedirs(invoker.joinPath(self.__dir), exist_ok=True)
workspaceFile = os.path.join(self.__dir, self.__fn)
extractor = self.__getExtractor()

destination = invoker.joinPath(self.__dir, self.__fn)
if extractor is not None and self.__separateDownload:
downloadDestination = invoker.joinPath(os.pardir, "download", self.__dir)
# os.makedirs doc:
# Note: makedirs() will become confused if the path elements to create include pardir (eg. “..” on UNIX systems).
# -> use normpath to collaps up-level reference
os.makedirs(os.path.normpath(downloadDestination), exist_ok=True)
destination = invoker.joinPath(os.pardir, "download", self.__dir, self.__fn)

# Download only if necessary
if not self.isDeterministic() or not os.path.isfile(destination):
Expand Down Expand Up @@ -600,26 +718,17 @@ async def invoke(self, invoker):
await self._put(invoker, workspaceFile, destination, url)

# Run optional extractors
extractors = self.__getExtractors()
canary = invoker.joinPath(self.__dir, "." + self.__fn + ".extracted")
if extractors and isYounger(destination, canary):
for cmd in extractors:
if shutil.which(cmd[0]) is None: continue
await invoker.checkCommand(cmd, cwd=self.__dir)
invoker.trace("<touch>", canary)
with open(canary, "wb") as f:
pass
os.utime(canary)
break
else:
invoker.fail("No suitable extractor found!")
if extractor is not None:
await extractor.extract(invoker)

def asDigestScript(self):
"""Return forward compatible stable string describing this url.
The format is "digest dir extract" if a SHA checksum was specified.
Otherwise it is "url dir extract". A "s#" is appended if leading paths
are stripped where # is the number of stripped elements.
are stripped where # is the number of stripped elements. Also appended
is "m<fileMode>" if fileMode is set.
"sep" is appendend if the archive is not stored in the workspace.
"""
if self.__stripUser:
filt = removeUserFromUrl
Expand All @@ -629,7 +738,8 @@ def asDigestScript(self):
self.__digestSha1 or filt(self.__url)
) + " " + posixpath.join(self.__dir, self.__fn) + " " + str(self.__extract) + \
( " s{}".format(self.__strip) if self.__strip > 0 else "" ) + \
( " m{}".format(self.__fileMode) if self.__fileMode is not None else "")
( " m{}".format(self.__fileMode) if self.__fileMode is not None else "") + \
( " sep" if self.__separateDownload else "" )

def getDirectory(self):
return self.__dir
Expand Down Expand Up @@ -659,39 +769,29 @@ def calcLiveBuildId(self, workspacePath):
else:
return None

def __getExtractors(self):
extractors = None
def __getExtractor(self):
extractor = None
if self.__extract in ["yes", "auto", True]:
for (ext, tool) in UrlScm.EXTENSIONS:
if self.__fn.endswith(ext):
extractors = UrlScm.EXTRACTORS[tool]
extractor = UrlScm.EXTRACTORS[tool](self.__dir, self.__fn,
self.__strip, self.__separateDownload)
break
if not extractors and self.__extract != "auto":
if extractor is None and self.__extract != "auto":
raise ParseError("Don't know how to extract '"+self.__fn+"' automatically.")
elif self.__extract in UrlScm.EXTRACTORS:
extractors = UrlScm.EXTRACTORS[self.__extract]
extractor = UrlScm.EXTRACTORS[self.__extract](self.__dir, self.__fn,
self.__strip, self.__separateDownload)
elif self.__extract not in ["no", False]:
raise ParseError("Invalid extract mode: " + self.__extract)

if extractors is None:
return []

ret = []
for extractor in extractors:
if not extractor[0]: continue
if self.__strip > 0:
if extractor[3] is None:
continue
strip = [extractor[3].format(self.__strip)]
else:
strip = []
ret.append([extractor[1]] + [a.format(self.__fn) for a in extractor[2]] + strip)

if not ret:
raise BuildError("Extractor does not support 'stripComponents'!")

return ret

return extractor

def postAttic(self, workspace):
if self.__separateDownload:
# os.path.exists returns False if os.pardir is in the path -> normalize it
downloadDestination = os.path.normpath(os.path.join(workspace, os.pardir, "download", self.__dir))
if os.path.exists(downloadDestination):
shutil.rmtree(downloadDestination)

class UrlAudit(ScmAudit):

Expand Down
2 changes: 2 additions & 0 deletions test/black-box/extractors/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
policies:
urlScmSeparateDownload: True
Binary file added test/black-box/extractors/input/test.7z
Binary file not shown.
1 change: 1 addition & 0 deletions test/black-box/extractors/input/test.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
Binary file added test/black-box/extractors/input/test.dat.gz
Binary file not shown.
Binary file added test/black-box/extractors/input/test.dat.xz
Binary file not shown.
Binary file added test/black-box/extractors/input/test.tgz
Binary file not shown.
Binary file added test/black-box/extractors/input/test.zip
Binary file not shown.
Loading

0 comments on commit d021b99

Please sign in to comment.