From 83244699a362009615f2dfc041db5800bc3c2df9 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Tue, 5 Oct 2021 12:02:20 +0200 Subject: [PATCH] snap-bootstrap: wait in `mountNonDataPartitionMatchingKernelDisk` * snap-bootstrap: wait in `mountNonDataPartitionMatchingKernelDisk` The current snap-bootstrap has a race when mounting the seed partition in `mountNonDataPartitionMatchingKernelDisk` on EFI systems. The code determines the partUUID of the disk that booted the kernel by reading the EFI LoaderDevicePartUUID variable. However there is no guarantee that this partition is available when snap-bootstrap runs, the kernel may still enumerate the HW. This can be observed on a fast NUC when booting from a USB stick. Note that the `the-tool.serice` already has a "After=systemd-udev-settle.service" set but that is still racy because any `udev settle` (or `udev trigger --settle`) is racy, the only option is to poll for the part uuid to appear. This is a minimal commit to avoid too much churn in code. Thanks to Sertac for reporting this bug. * snap-bootstrap: rework waitPartSrc to improve testing (thanks to Alberto and Ian) * snap-bootstrap: show a log message if waitPartSrc needs to wait If waitPartSrc needs to wait for the device this commit makes it show a logger.Noticef() message. The message is only shown once because waiting for the device is usually super quick and if it is not that is most likely an error anyway so spamming the terminal will not help. * snap-bootstrap: add test that ensures that if no waiting is needed for partSrc no log message is displayed * snap-bootstrap: rename waitPartSrc -> waitFile * snap-bootstrap: fix time.Duration() casting on 32bit systems --- cmd/snap-bootstrap/cmd_initramfs_mounts.go | 25 +++++ .../cmd_initramfs_mounts_test.go | 92 +++++++++++++++++++ cmd/snap-bootstrap/export_test.go | 12 +++ 3 files changed, 129 insertions(+) diff --git a/cmd/snap-bootstrap/cmd_initramfs_mounts.go b/cmd/snap-bootstrap/cmd_initramfs_mounts.go index fbafe6b4988..1e4025e4819 100644 --- a/cmd/snap-bootstrap/cmd_initramfs_mounts.go +++ b/cmd/snap-bootstrap/cmd_initramfs_mounts.go @@ -28,6 +28,7 @@ import ( "path/filepath" "strings" "syscall" + "time" "github.com/jessevdk/go-flags" @@ -1213,6 +1214,18 @@ func checkDataAndSavePairing(rootdir string) (bool, error) { return subtle.ConstantTimeCompare(marker1, marker2) == 1, nil } +// waitFile waits for the given file/device-node/directory to appear. +var waitFile = func(path string, wait time.Duration, n int) error { + for i := 0; i < n; i++ { + if osutil.FileExists(path) { + return nil + } + time.Sleep(wait) + } + + return fmt.Errorf("no %v after waiting for %v", path, time.Duration(n)*wait) +} + // mountNonDataPartitionMatchingKernelDisk will select the partition to mount at // dir, using the boot package function FindPartitionUUIDForBootedKernelDisk to // determine what partition the booted kernel came from. If which disk the @@ -1228,6 +1241,18 @@ func mountNonDataPartitionMatchingKernelDisk(dir, fallbacklabel string) error { partSrc = filepath.Join("/dev/disk/by-label", fallbacklabel) } + // The partition uuid is read from the EFI variables. At this point + // the kernel may not have initialized the storage HW yet so poll + // here. + if !osutil.FileExists(filepath.Join(dirs.GlobalRootDir, partSrc)) { + pollWait := 50 * time.Millisecond + pollIterations := 1200 + logger.Noticef("waiting up to %v for %v to appear", time.Duration(pollIterations)*pollWait, partSrc) + if err := waitFile(filepath.Join(dirs.GlobalRootDir, partSrc), pollWait, pollIterations); err != nil { + return fmt.Errorf("cannot mount source: %v", err) + } + } + opts := &systemdMountOptions{ // always fsck the partition when we are mounting it, as this is the // first partition we will be mounting, we can't know if anything is diff --git a/cmd/snap-bootstrap/cmd_initramfs_mounts_test.go b/cmd/snap-bootstrap/cmd_initramfs_mounts_test.go index 9471ad9e026..c2f0dc23949 100644 --- a/cmd/snap-bootstrap/cmd_initramfs_mounts_test.go +++ b/cmd/snap-bootstrap/cmd_initramfs_mounts_test.go @@ -197,6 +197,11 @@ func (s *initramfsMountsSuite) SetUpTest(c *C) { restore = func() { dirs.SetRootDir("") } s.AddCleanup(restore) + restore = main.MockWaitFile(func(string, time.Duration, int) error { + return nil + }) + s.AddCleanup(restore) + // use a specific time for all the assertions, in the future so that we can // set the timestamp of the model assertion to something newer than now, but // still older than the snap declarations by default @@ -5932,3 +5937,90 @@ func (s *initramfsMountsSuite) TestInitramfsMountsTryRecoveryHealthCheckFails(c c.Check(rebootCalls, Equals, 1) c.Check(s.logs.String(), testutil.Contains, `try recovery system health check failed: mock failure`) } + +func (s *initramfsMountsSuite) TestMountNonDataPartitionPolls(c *C) { + restore := main.MockPartitionUUIDForBootedKernelDisk("some-uuid") + defer restore() + + var waitFile []string + var pollWait time.Duration + var pollIterations int + restore = main.MockWaitFile(func(path string, wait time.Duration, n int) error { + waitFile = append(waitFile, path) + pollWait = wait + pollIterations = n + return fmt.Errorf("error") + }) + defer restore() + + n := 0 + restore = main.MockSystemdMount(func(what, where string, opts *main.SystemdMountOptions) error { + n++ + return nil + }) + defer restore() + + err := main.MountNonDataPartitionMatchingKernelDisk("/some/target", "") + c.Check(err, ErrorMatches, "cannot mount source: error") + c.Check(n, Equals, 0) + c.Check(waitFile, DeepEquals, []string{ + filepath.Join(dirs.GlobalRootDir, "/dev/disk/by-partuuid/some-uuid"), + }) + c.Check(pollWait, DeepEquals, 50*time.Millisecond) + c.Check(pollIterations, DeepEquals, 1200) + c.Check(s.logs.String(), Matches, "(?m).* waiting up to 1m0s for /dev/disk/by-partuuid/some-uuid to appear") + // there is only a single log msg + c.Check(strings.Count(s.logs.String(), "\n"), Equals, 1) +} + +func (s *initramfsMountsSuite) TestMountNonDataPartitionNoPollNoLogMsg(c *C) { + restore := main.MockPartitionUUIDForBootedKernelDisk("some-uuid") + defer restore() + + n := 0 + restore = main.MockSystemdMount(func(what, where string, opts *main.SystemdMountOptions) error { + n++ + return nil + }) + defer restore() + + fakedPartSrc := filepath.Join(dirs.GlobalRootDir, "/dev/disk/by-partuuid/some-uuid") + err := os.MkdirAll(filepath.Dir(fakedPartSrc), 0755) + c.Assert(err, IsNil) + err = ioutil.WriteFile(fakedPartSrc, nil, 0644) + c.Assert(err, IsNil) + + err = main.MountNonDataPartitionMatchingKernelDisk("some-target", "") + c.Check(err, IsNil) + c.Check(s.logs.String(), Equals, "") + c.Check(n, Equals, 1) +} + +func (s *initramfsMountsSuite) TestWaitFileErr(c *C) { + err := main.WaitFile("/dev/does-not-exist", 10*time.Millisecond, 2) + c.Check(err, ErrorMatches, "no /dev/does-not-exist after waiting for 20ms") +} + +func (s *initramfsMountsSuite) TestWaitFile(c *C) { + existingPartSrc := filepath.Join(c.MkDir(), "does-exist") + err := ioutil.WriteFile(existingPartSrc, nil, 0644) + c.Assert(err, IsNil) + + err = main.WaitFile(existingPartSrc, 5000*time.Second, 1) + c.Check(err, IsNil) + + err = main.WaitFile(existingPartSrc, 1*time.Second, 10000) + c.Check(err, IsNil) +} + +func (s *initramfsMountsSuite) TestWaitFileWorksWithFilesAppearingLate(c *C) { + eventuallyExists := filepath.Join(c.MkDir(), "eventually-exists") + go func() { + time.Sleep(40 * time.Millisecond) + err := ioutil.WriteFile(eventuallyExists, nil, 0644) + c.Assert(err, IsNil) + }() + + err := main.WaitFile(eventuallyExists, 5*time.Millisecond, 1000) + c.Check(err, IsNil) +} diff --git a/cmd/snap-bootstrap/export_test.go b/cmd/snap-bootstrap/export_test.go index b3f01d3bb80..fb6a6531fd8 100644 --- a/cmd/snap-bootstrap/export_test.go +++ b/cmd/snap-bootstrap/export_test.go @@ -32,6 +32,8 @@ var ( Parser = parser DoSystemdMount = doSystemdMountImpl + + MountNonDataPartitionMatchingKernelDisk = mountNonDataPartitionMatchingKernelDisk ) type SystemdMountOptions = systemdMountOptions @@ -160,3 +162,13 @@ func MockTryRecoverySystemHealthCheck(mock func() error) (restore func()) { tryRecoverySystemHealthCheck = old } } + +func MockWaitFile(f func(string, time.Duration, int) error) (restore func()) { + old := waitFile + waitFile = f + return func() { + waitFile = old + } +} + +var WaitFile = waitFile