Merge pull request #606 from rhubert/urlscm-bob-download

Place url-scm download files in .bob-download
BobBuildTool · Jan 21, 2025 · d021b99 · d021b99
2 parents 2d829b4 + c619d27
commit d021b99
Show file tree

Hide file tree

Showing 20 changed files with 367 additions and 109 deletions.
diff --git a/doc/manual/policies.rst b/doc/manual/policies.rst
@@ -344,6 +344,22 @@ New behavior
 
     Unmanaged layers are expected in the same directory.
 
+urlScmSeparateDownload
+~~~~~~~~~~~~~~~~~~~~~~
+
+Introduced in: 1.0
+
+This policy controls where bob places downloaded files of UrlScms if extraction is
+used.
+
+Old behavior
+    The downloaded file could be found in the workspace next to the extracted files.
+
+New behavior
+    The downloaded file is stored next to the workspace in a separate download folder.
+    Only the extracted content is in the workspace.
+
+
 .. _policies-obsolete:
 
 Obsolete policies

diff --git a/pym/bob/builder.py b/pym/bob/builder.py
@@ -1209,6 +1209,8 @@ async def _cookCheckoutStep(self, checkoutStep, depth):
                                 os.makedirs(atticPath)
                             atticPath = os.path.join(atticPath, atticName)
                             os.rename(scmPath, atticPath)
+                            if scmDir in scmMap:
+                                scmMap[scmDir].postAttic(prettySrcPath)
                             BobState().setAtticDirectoryState(atticPath, scmSpec)
                             atticPaths.add(scmPath, atticPath)
                         del oldCheckoutState[scmDir]

diff --git a/pym/bob/input.py b/pym/bob/input.py
@@ -3029,6 +3029,7 @@ class RecipeSet:
                 schema.Optional('defaultFileMode') : bool,
                 schema.Optional('substituteMetaEnv') : bool,
                 schema.Optional('managedLayers') : bool,
+                schema.Optional('urlScmSeparateDownload') : bool,
             },
             error="Invalid policy specified! Are you using an appropriate version of Bob?"
         ),
@@ -3083,6 +3084,11 @@ class RecipeSet:
             InfoOnce("managedLayers policy is not set. Only unmanaged layers are supported.",
                 help="See http://bob-build-tool.readthedocs.io/en/latest/manual/policies.html#managedlayers for more information.")
         ),
+        "urlScmSeparateDownload": (
+            "0.25.1.dev27",
+            InfoOnce("urlScmSeparateDownload policy is not set. Extracted archives of the 'url' SCM are retained in the workspace.",
+                help="See http://bob-build-tool.readthedocs.io/en/latest/manual/policies.html#urlscmseparatedownload for more information.")
+        )
     }
 
     _ignoreCmdConfig = False

diff --git a/pym/bob/intermediate.py b/pym/bob/intermediate.py
@@ -553,6 +553,7 @@ def fromRecipeSet(cls, recipeSet):
             'gitCommitOnBranch' : recipeSet.getPolicy('gitCommitOnBranch'),
             'fixImportScmVariant' : recipeSet.getPolicy('fixImportScmVariant'),
             'defaultFileMode' : recipeSet.getPolicy('defaultFileMode'),
+            'urlScmSeparateDownload' : recipeSet.getPolicy('urlScmSeparateDownload'),
         }
         self.__data['archiveSpec'] = recipeSet.archiveSpec()
         self.__data['envWhiteList'] = sorted(recipeSet.envWhiteList())

diff --git a/pym/bob/scm/__init__.py b/pym/bob/scm/__init__.py
@@ -77,6 +77,7 @@ def getScm(spec, overrides=[], recipeSet=None):
             recipeSet and recipeSet.getPolicy('scmIgnoreUser'),
             recipeSet.getPreMirrors() if recipeSet else [],
             recipeSet.getFallbackMirrors() if recipeSet else [],
-            recipeSet and recipeSet.getPolicy('defaultFileMode'))
+            recipeSet and recipeSet.getPolicy('defaultFileMode'),
+            recipeSet and recipeSet.getPolicy('urlScmSeparateDownload'))
     else:
         raise ParseError("Unknown SCM '{}'".format(scm))
diff --git a/pym/bob/scm/scm.py b/pym/bob/scm/scm.py
@@ -361,6 +361,9 @@ def calcLiveBuildId(self, workspacePath):
         """Calculate live build-id from workspace."""
         return None
 
+    def postAttic(self, workspace):
+        pass
+
 class ScmAudit(metaclass=ABCMeta):
     @classmethod
     async def fromDir(cls, workspace, dir, extra):

diff --git a/pym/bob/scm/url.py b/pym/bob/scm/url.py
@@ -10,6 +10,7 @@
         replacePath
 from .scm import Scm, ScmAudit
 from http.client import HTTPException
+from abc import abstractmethod
 import asyncio
 import concurrent.futures.process
 import contextlib
@@ -155,6 +156,123 @@ def dumpMode(mode):
 
 isWin32 = sys.platform == "win32"
 
+
+class Extractor():
+    def __init__(self, dir, file, strip, separateDownload):
+        self.dir = dir
+        self.file = file
+        self.strip = strip
+        self.separateDownload = separateDownload
+
+    async def _extract(self, cmds, invoker):
+        destination = self.getCompressedFilePath(invoker)
+        canary = destination+".extracted"
+        if isYounger(destination, canary):
+            for cmd in cmds:
+                if shutil.which(cmd[0]) is None: continue
+                await invoker.checkCommand(cmd, cwd=self.dir)
+                invoker.trace("<touch>", canary)
+                with open(canary, "wb") as f:
+                    pass
+                os.utime(canary)
+                break
+            else:
+                invoker.fail("No suitable extractor found!")
+
+    def getCompressedFilePath(self, invoker):
+        downloadFolder = os.path.join(os.pardir, "download") if self.separateDownload else ""
+        return os.path.abspath(invoker.joinPath(downloadFolder, self.dir, self.file)) \
+
+    @abstractmethod
+    async def extract(self, invoker, destination, cwd):
+        return False
+
+# Use the Python tar/zip extraction only on Windows. They are slower and in
+# case of tarfile broken in certain ways (e.g. tarfile will result in
+# different file modes!). But it shouldn't make a difference on Windows.
+class TarExtractor(Extractor):
+    def __init__(self, dir, file, strip, separateDownload):
+        super().__init__(dir, file, strip, separateDownload)
+
+    async def extract(self, invoker):
+        cmds = []
+        compressedFilePath = self.getCompressedFilePath(invoker)
+        if isWin32 and self.strip == 0:
+            cmds.append(["python", "-m", "tarfile", "-e", compressedFilePath])
+
+        cmd = ["tar", "-x", "--no-same-owner", "--no-same-permissions",
+                   "-f", compressedFilePath]
+        if self.strip > 0:
+            cmd.append("--strip-components={}".format(self.strip))
+        cmds.append(cmd)
+
+        await self._extract(cmds, invoker)
+
+
+class ZipExtractor(Extractor):
+    def __init__(self, dir, file, strip, separateDownload):
+        super().__init__(dir, file, strip, separateDownload)
+        if strip != 0:
+            raise BuildError("Extractor does not support 'stripComponents'!")
+
+    async def extract(self, invoker):
+        cmds = []
+        compressedFilePath = self.getCompressedFilePath(invoker)
+        if isWin32:
+            cmds.append(["python", "-m", "zipfile",
+                   "-e", compressedFilePath, "."])
+
+        cmds.append(["unzip", "-o", compressedFilePath])
+        await self._extract(cmds, invoker)
+
+
+class GZipExtractor(Extractor):
+    def __init__(self, dir, file, strip, separateDownload):
+        super().__init__(dir, file, strip, separateDownload)
+        if strip != 0:
+            raise BuildError("Extractor does not support 'stripComponents'!")
+
+    async def extract(self, invoker):
+        # gunzip extracts the file at the location of the input file. Copy the
+        # downloaded file to the workspace directory prio to uncompressing it
+        cmd = ["gunzip"]
+        if self.separateDownload:
+            shutil.copyfile(self.getCompressedFilePath(invoker),
+                            invoker.joinPath(self.dir, self.file))
+        else:
+            cmd.append("-k")
+        cmd.extend(["-f", self.file])
+        await self._extract([cmd], invoker)
+
+
+class XZExtractor(Extractor):
+    def __init__(self, dir, file, strip, separateDownload):
+        super().__init__(dir, file, strip, separateDownload)
+        if strip != 0:
+            raise BuildError("Extractor does not support 'stripComponents'!")
+
+    async def extract(self, invoker):
+        cmd = ["unxz"]
+        if self.separateDownload:
+            shutil.copyfile(self.getCompressedFilePath(invoker),
+                            invoker.joinPath(self.dir, self.file))
+        else:
+            cmd.append("-k")
+        cmd.extend(["-f", self.file])
+        await self._extract([cmd], invoker)
+
+
+class SevenZipExtractor(Extractor):
+    def __init__(self, dir, file, strip, separateDownload):
+        super().__init__(dir, file, strip, separateDownload)
+        if strip != 0:
+            raise BuildError("Extractor does not support 'stripComponents'!")
+
+    async def extract(self, invoker):
+        cmds = [["7z", "x", "-y", self.getCompressedFilePath(invoker)]]
+        await self._extract(cmds, invoker)
+
+
 class UrlScm(Scm):
 
     __DEFAULTS = {
@@ -212,31 +330,17 @@ class UrlScm(Scm):
         (".zip",       "zip"),
     ]
 
-    # Use the Python tar/zip extraction only on Windows. They are slower and in
-    # case of tarfile broken in certain ways (e.g. tarfile will result in
-    # different file modes!). But it shouldn't make a difference on Windows.
     EXTRACTORS = {
-        "tar"  : [
-            (isWin32, "python", ["-m", "tarfile", "-e", "{}"], None),
-            (True, "tar", ["-x", "--no-same-owner", "--no-same-permissions", "-f", "{}"], "--strip-components={}"),
-        ],
-        "gzip" : [
-            (True, "gunzip", ["-kf", "{}"], None),
-        ],
-        "xz" : [
-            (True, "unxz", ["-kf", "{}"], None),
-        ],
-        "7z" : [
-            (True, "7z", ["x", "-y", "{}"], None),
-        ],
-        "zip" : [
-            (isWin32, "python", ["-m", "zipfile", "-e", "{}", "."], None),
-            (True, "unzip", ["-o", "{}"], None),
-        ],
+        "tar"  : TarExtractor,
+        "gzip" : GZipExtractor,
+        "xz"   : XZExtractor,
+        "7z"   : SevenZipExtractor,
+        "zip"  : ZipExtractor,
     }
 
     def __init__(self, spec, overrides=[], stripUser=None,
-                 preMirrors=[], fallbackMirrors=[], defaultFileMode=None):
+                 preMirrors=[], fallbackMirrors=[], defaultFileMode=None,
+                 separateDownload=False):
         super().__init__(spec, overrides)
         self.__url = spec["url"]
         self.__digestSha1 = spec.get("digestSHA1")
@@ -275,6 +379,7 @@ def __init__(self, spec, overrides=[], stripUser=None,
         self.__fallbackMirrorsUrls = spec.get("fallbackMirrors")
         self.__fallbackMirrorsUpload = spec.get("__fallbackMirrorsUpload")
         self.__fileMode = spec.get("fileMode", 0o600 if defaultFileMode else None)
+        self.__separateDownload = spec.get("__separateDownload", separateDownload)
 
     def getProperties(self, isJenkins, pretty=False):
         ret = super().getProperties(isJenkins)
@@ -295,6 +400,7 @@ def getProperties(self, isJenkins, pretty=False):
             'fallbackMirrors' : self.__getFallbackMirrorsUrls(),
             '__fallbackMirrorsUpload' : self.__getFallbackMirrorsUpload(),
             'fileMode' : dumpMode(self.__fileMode) if pretty else self.__fileMode,
+            '__separateDownload': self.__separateDownload,
         })
         return ret
 
@@ -517,6 +623,9 @@ async def _put(self, invoker, workspaceFile, source, url):
             invoker.fail("Upload not supported for URL scheme: " + url.scheme)
 
     def canSwitch(self, oldScm):
+        if self.__separateDownload != oldScm.__separateDownload:
+            return False
+
         diff = self._diffSpec(oldScm)
         if "scm" in diff:
             return False
@@ -551,7 +660,16 @@ async def switch(self, invoker, oldScm):
     async def invoke(self, invoker):
         os.makedirs(invoker.joinPath(self.__dir), exist_ok=True)
         workspaceFile = os.path.join(self.__dir, self.__fn)
+        extractor = self.__getExtractor()
+
         destination = invoker.joinPath(self.__dir, self.__fn)
+        if extractor is not None and self.__separateDownload:
+            downloadDestination = invoker.joinPath(os.pardir, "download", self.__dir)
+            # os.makedirs doc:
+            # Note: makedirs() will become confused if the path elements to create include pardir (eg. “..” on UNIX systems).
+            # -> use normpath to collaps up-level reference
+            os.makedirs(os.path.normpath(downloadDestination), exist_ok=True)
+            destination = invoker.joinPath(os.pardir, "download", self.__dir, self.__fn)
 
         # Download only if necessary
         if not self.isDeterministic() or not os.path.isfile(destination):
@@ -600,26 +718,17 @@ async def invoke(self, invoker):
                     await self._put(invoker, workspaceFile, destination, url)
 
         # Run optional extractors
-        extractors = self.__getExtractors()
-        canary = invoker.joinPath(self.__dir, "." + self.__fn + ".extracted")
-        if extractors and isYounger(destination, canary):
-            for cmd in extractors:
-                if shutil.which(cmd[0]) is None: continue
-                await invoker.checkCommand(cmd, cwd=self.__dir)
-                invoker.trace("<touch>", canary)
-                with open(canary, "wb") as f:
-                    pass
-                os.utime(canary)
-                break
-            else:
-                invoker.fail("No suitable extractor found!")
+        if extractor is not None:
+            await extractor.extract(invoker)
 
     def asDigestScript(self):
         """Return forward compatible stable string describing this url.
 
         The format is "digest dir extract" if a SHA checksum was specified.
         Otherwise it is "url dir extract". A "s#" is appended if leading paths
-        are stripped where # is the number of stripped elements.
+        are stripped where # is the number of stripped elements. Also appended
+        is "m<fileMode>" if fileMode is set.
+        "sep" is appendend if the archive is not stored in the workspace.
         """
         if self.__stripUser:
             filt = removeUserFromUrl
@@ -629,7 +738,8 @@ def asDigestScript(self):
                  self.__digestSha1 or filt(self.__url)
                ) + " " + posixpath.join(self.__dir, self.__fn) + " " + str(self.__extract) + \
                ( " s{}".format(self.__strip) if self.__strip > 0 else "" ) + \
-               ( " m{}".format(self.__fileMode) if self.__fileMode is not None else "")
+               ( " m{}".format(self.__fileMode) if self.__fileMode is not None else "") + \
+               ( " sep" if self.__separateDownload else "" )
 
     def getDirectory(self):
         return self.__dir
@@ -659,39 +769,29 @@ def calcLiveBuildId(self, workspacePath):
         else:
             return None
 
-    def __getExtractors(self):
-        extractors = None
+    def __getExtractor(self):
+        extractor = None
         if self.__extract in ["yes", "auto", True]:
             for (ext, tool) in UrlScm.EXTENSIONS:
                 if self.__fn.endswith(ext):
-                    extractors = UrlScm.EXTRACTORS[tool]
+                    extractor = UrlScm.EXTRACTORS[tool](self.__dir, self.__fn,
+                                                        self.__strip, self.__separateDownload)
                     break
-            if not extractors and self.__extract != "auto":
+            if extractor is None and self.__extract != "auto":
                 raise ParseError("Don't know how to extract '"+self.__fn+"' automatically.")
         elif self.__extract in UrlScm.EXTRACTORS:
-            extractors = UrlScm.EXTRACTORS[self.__extract]
+            extractor = UrlScm.EXTRACTORS[self.__extract](self.__dir, self.__fn,
+                                                          self.__strip, self.__separateDownload)
         elif self.__extract not in ["no", False]:
             raise ParseError("Invalid extract mode: " + self.__extract)
-
-        if extractors is None:
-            return []
-
-        ret = []
-        for extractor in extractors:
-            if not extractor[0]: continue
-            if self.__strip > 0:
-                if extractor[3] is None:
-                    continue
-                strip = [extractor[3].format(self.__strip)]
-            else:
-                strip = []
-            ret.append([extractor[1]] + [a.format(self.__fn) for a in extractor[2]] + strip)
-
-        if not ret:
-            raise BuildError("Extractor does not support 'stripComponents'!")
-
-        return ret
-
+        return extractor
+
+    def postAttic(self, workspace):
+        if self.__separateDownload:
+            # os.path.exists returns False if os.pardir is in the path -> normalize it
+            downloadDestination = os.path.normpath(os.path.join(workspace, os.pardir, "download", self.__dir))
+            if os.path.exists(downloadDestination):
+                shutil.rmtree(downloadDestination)
 
 class UrlAudit(ScmAudit):
 

diff --git a/test/black-box/extractors/config.yaml b/test/black-box/extractors/config.yaml
@@ -0,0 +1,2 @@
+policies:
+  urlScmSeparateDownload: True
diff --git a/test/black-box/extractors/input/test.7z b/test/black-box/extractors/input/test.7z
diff --git a/test/black-box/extractors/input/test.dat b/test/black-box/extractors/input/test.dat
@@ -0,0 +1 @@
+1
diff --git a/test/black-box/extractors/input/test.dat.gz b/test/black-box/extractors/input/test.dat.gz
diff --git a/test/black-box/extractors/input/test.dat.xz b/test/black-box/extractors/input/test.dat.xz
diff --git a/test/black-box/extractors/input/test.tgz b/test/black-box/extractors/input/test.tgz
diff --git a/test/black-box/extractors/input/test.zip b/test/black-box/extractors/input/test.zip