2019-06-04 17:11:33 +09:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2018-07-18 22:44:41 +09:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2017 Red Hat, Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/cred.h>
|
|
|
|
#include <linux/file.h>
|
2018-07-18 22:44:42 +09:00
|
|
|
#include <linux/mount.h>
|
2018-07-18 22:44:41 +09:00
|
|
|
#include <linux/xattr.h>
|
2018-07-18 22:44:41 +09:00
|
|
|
#include <linux/uio.h>
|
2019-05-06 16:41:02 +09:00
|
|
|
#include <linux/uaccess.h>
|
2020-01-17 21:49:29 +09:00
|
|
|
#include <linux/splice.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/fs.h>
|
2018-07-18 22:44:41 +09:00
|
|
|
#include "overlayfs.h"
|
|
|
|
|
2021-01-26 01:58:50 +09:00
|
|
|
#define OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
|
|
|
|
{
|
|
|
|
if (realinode != ovl_inode_upper(inode))
|
|
|
|
return 'l';
|
|
|
|
if (ovl_has_upperdata(inode))
|
|
|
|
return 'u';
|
|
|
|
else
|
|
|
|
return 'm';
|
|
|
|
}
|
|
|
|
|
2020-06-19 00:43:53 +09:00
|
|
|
/* No atime modificaton nor notify on underlying */
|
|
|
|
#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
static struct file *ovl_open_realfile(const struct file *file,
|
|
|
|
struct inode *realinode)
|
2018-07-18 22:44:41 +09:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
struct file *realfile;
|
|
|
|
const struct cred *old_cred;
|
2020-06-19 00:43:53 +09:00
|
|
|
int flags = file->f_flags | OVL_OPEN_FLAGS;
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
2019-04-25 01:39:50 +09:00
|
|
|
realfile = open_with_fake_path(&file->f_path, flags, realinode,
|
|
|
|
current_cred());
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(inode->i_sb, old_cred);
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
|
2018-05-12 00:49:31 +09:00
|
|
|
file, file, ovl_whatisit(inode, realinode), file->f_flags,
|
2018-07-18 22:44:41 +09:00
|
|
|
realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
|
|
|
|
|
|
|
|
return realfile;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
|
|
|
|
|
|
|
|
static int ovl_change_flags(struct file *file, unsigned int flags)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
int err;
|
|
|
|
|
2020-06-19 00:43:53 +09:00
|
|
|
flags |= OVL_OPEN_FLAGS;
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
/* If some flag changed that cannot be changed then something's amiss */
|
|
|
|
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
flags &= OVL_SETFL_MASK;
|
|
|
|
|
|
|
|
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (flags & O_DIRECT) {
|
|
|
|
if (!file->f_mapping->a_ops ||
|
|
|
|
!file->f_mapping->a_ops->direct_IO)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (file->f_op->check_flags) {
|
|
|
|
err = file->f_op->check_flags(flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock(&file->f_lock);
|
|
|
|
file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
|
|
|
|
spin_unlock(&file->f_lock);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
|
|
|
|
bool allow_meta)
|
2018-07-18 22:44:41 +09:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
2018-05-12 00:49:31 +09:00
|
|
|
struct inode *realinode;
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
real->flags = 0;
|
|
|
|
real->file = file->private_data;
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
if (allow_meta)
|
|
|
|
realinode = ovl_inode_real(inode);
|
|
|
|
else
|
|
|
|
realinode = ovl_inode_realdata(inode);
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
/* Has it been copied up since we'd opened it? */
|
2018-05-12 00:49:31 +09:00
|
|
|
if (unlikely(file_inode(real->file) != realinode)) {
|
2018-07-18 22:44:41 +09:00
|
|
|
real->flags = FDPUT_FPUT;
|
2018-05-12 00:49:31 +09:00
|
|
|
real->file = ovl_open_realfile(file, realinode);
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
return PTR_ERR_OR_ZERO(real->file);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Did the flags change since open? */
|
2020-06-19 00:43:53 +09:00
|
|
|
if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
|
2018-07-18 22:44:41 +09:00
|
|
|
return ovl_change_flags(real->file, file->f_flags);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
static int ovl_real_fdget(const struct file *file, struct fd *real)
|
|
|
|
{
|
|
|
|
return ovl_real_fdget_meta(file, real, false);
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
static int ovl_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
struct file *realfile;
|
|
|
|
int err;
|
|
|
|
|
2019-01-22 14:01:39 +09:00
|
|
|
err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
|
2018-07-18 22:44:41 +09:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* No longer need these flags, so don't pass them on to underlying fs */
|
|
|
|
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
|
2018-07-18 22:44:41 +09:00
|
|
|
if (IS_ERR(realfile))
|
|
|
|
return PTR_ERR(realfile);
|
|
|
|
|
|
|
|
file->private_data = realfile;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ovl_release(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
fput(file->private_data);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
|
|
|
|
{
|
2019-02-27 20:32:11 +09:00
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
2020-02-03 19:41:53 +09:00
|
|
|
loff_t ret;
|
2019-02-27 20:32:11 +09:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The two special cases below do not need to involve real fs,
|
|
|
|
* so we can optimizing concurrent callers.
|
|
|
|
*/
|
|
|
|
if (offset == 0) {
|
|
|
|
if (whence == SEEK_CUR)
|
|
|
|
return file->f_pos;
|
|
|
|
|
|
|
|
if (whence == SEEK_SET)
|
|
|
|
return vfs_setpos(file, 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Overlay file f_pos is the master copy that is preserved
|
|
|
|
* through copy up and modified on read/write, but only real
|
|
|
|
* fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
|
|
|
|
* limitations that are more strict than ->s_maxbytes for specific
|
|
|
|
* files, so we use the real file to perform seeks.
|
|
|
|
*/
|
|
|
|
inode_lock(inode);
|
|
|
|
real.file->f_pos = file->f_pos;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
|
|
|
ret = vfs_llseek(real.file, offset, whence);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(inode->i_sb, old_cred);
|
2019-02-27 20:32:11 +09:00
|
|
|
|
|
|
|
file->f_pos = real.file->f_pos;
|
|
|
|
inode_unlock(inode);
|
|
|
|
|
|
|
|
fdput(real);
|
2018-07-18 22:44:41 +09:00
|
|
|
|
2019-02-27 20:32:11 +09:00
|
|
|
return ret;
|
2018-07-18 22:44:41 +09:00
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
static void ovl_file_accessed(struct file *file)
|
|
|
|
{
|
|
|
|
struct inode *inode, *upperinode;
|
|
|
|
|
|
|
|
if (file->f_flags & O_NOATIME)
|
|
|
|
return;
|
|
|
|
|
|
|
|
inode = file_inode(file);
|
|
|
|
upperinode = ovl_inode_upper(inode);
|
|
|
|
|
|
|
|
if (!upperinode)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
|
|
|
|
!timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
|
|
|
|
inode->i_mtime = upperinode->i_mtime;
|
|
|
|
inode->i_ctime = upperinode->i_ctime;
|
|
|
|
}
|
|
|
|
|
|
|
|
touch_atime(&file->f_path);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|
|
|
{
|
|
|
|
struct file *file = iocb->ki_filp;
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if (!iov_iter_count(iter))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
|
2021-01-26 01:58:50 +09:00
|
|
|
iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK));
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
ovl_file_accessed(file);
|
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|
|
|
{
|
|
|
|
struct file *file = iocb->ki_filp;
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if (!iov_iter_count(iter))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
|
|
|
/* Update mode */
|
|
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
|
|
ret = file_remove_privs(file);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
2018-09-18 22:34:32 +09:00
|
|
|
file_start_write(real.file);
|
2018-07-18 22:44:41 +09:00
|
|
|
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
|
2021-01-26 01:58:50 +09:00
|
|
|
iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK));
|
2018-09-18 22:34:32 +09:00
|
|
|
file_end_write(real.file);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:41 +09:00
|
|
|
|
|
|
|
/* Update size */
|
|
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
inode_unlock(inode);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-07-28 17:38:43 +09:00
|
|
|
/*
|
|
|
|
* Calling iter_file_splice_write() directly from overlay's f_op may deadlock
|
|
|
|
* due to lock order inversion between pipe->mutex in iter_file_splice_write()
|
|
|
|
* and file_start_write(real.file) in ovl_write_iter().
|
|
|
|
*
|
|
|
|
* So do everything ovl_write_iter() does and call iter_file_splice_write() on
|
|
|
|
* the real file.
|
|
|
|
*/
|
|
|
|
static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
|
|
|
loff_t *ppos, size_t len, unsigned int flags)
|
|
|
|
{
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
struct inode *inode = file_inode(out);
|
|
|
|
struct inode *realinode = ovl_inode_real(inode);
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
|
|
|
/* Update mode */
|
|
|
|
ovl_copyattr(realinode, inode);
|
|
|
|
ret = file_remove_privs(out);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(out, &real);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
|
|
|
file_start_write(real.file);
|
|
|
|
|
|
|
|
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
|
|
|
|
|
|
|
|
file_end_write(real.file);
|
|
|
|
/* Update size */
|
|
|
|
ovl_copyattr(realinode, inode);
|
|
|
|
revert_creds(old_cred);
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
inode_unlock(inode);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:42 +09:00
|
|
|
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
|
|
|
{
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
int ret;
|
|
|
|
|
2018-05-12 00:49:31 +09:00
|
|
|
ret = ovl_real_fdget_meta(file, &real, !datasync);
|
2018-07-18 22:44:42 +09:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* Don't sync lower file for fear of receiving EROFS error */
|
|
|
|
if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = vfs_fsync_range(real.file, start, end, datasync);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:42 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:42 +09:00
|
|
|
static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
struct file *realfile = file->private_data;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!realfile->f_op->mmap)
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (WARN_ON(file != vma->vm_file))
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
vma->vm_file = get_file(realfile);
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = call_mmap(vma->vm_file, vma);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
/* Drop reference count from new vm_file value */
|
|
|
|
fput(realfile);
|
|
|
|
} else {
|
|
|
|
/* Drop reference count from previous vm_file value */
|
|
|
|
fput(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
ovl_file_accessed(file);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:42 +09:00
|
|
|
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = vfs_fallocate(real.file, mode, offset, len);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
/* Update size */
|
|
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-08-28 16:58:41 +09:00
|
|
|
static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
|
|
|
{
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = vfs_fadvise(real.file, offset, len, advice);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-08-28 16:58:41 +09:00
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:42 +09:00
|
|
|
static long ovl_real_ioctl(struct file *file, unsigned int cmd,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
struct fd real;
|
|
|
|
const struct cred *old_cred;
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
|
|
ret = vfs_ioctl(real.file, cmd, arg);
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
fdput(real);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-05-26 15:28:25 +09:00
|
|
|
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
|
2019-06-12 00:09:28 +09:00
|
|
|
unsigned long arg, unsigned int iflags)
|
2018-07-18 22:44:42 +09:00
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
struct inode *inode = file_inode(file);
|
2019-06-12 00:09:28 +09:00
|
|
|
unsigned int old_iflags;
|
2019-05-06 16:41:02 +09:00
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
ret = mnt_want_write_file(file);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
|
|
|
|
|
|
|
/* Check the capability before cred override */
|
|
|
|
ret = -EPERM;
|
2019-06-12 00:09:28 +09:00
|
|
|
old_iflags = READ_ONCE(inode->i_flags);
|
|
|
|
if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
|
2019-05-06 16:41:02 +09:00
|
|
|
!capable(CAP_LINUX_IMMUTABLE))
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
|
|
|
|
if (ret)
|
|
|
|
goto unlock;
|
|
|
|
|
2019-05-26 15:28:25 +09:00
|
|
|
ret = ovl_real_ioctl(file, cmd, arg);
|
2019-05-06 16:41:02 +09:00
|
|
|
|
|
|
|
ovl_copyflags(ovl_inode_real(inode), inode);
|
|
|
|
unlock:
|
|
|
|
inode_unlock(inode);
|
|
|
|
|
|
|
|
mnt_drop_write_file(file);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-06-12 00:09:28 +09:00
|
|
|
static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
|
|
|
|
{
|
|
|
|
unsigned int iflags = 0;
|
|
|
|
|
|
|
|
if (flags & FS_SYNC_FL)
|
|
|
|
iflags |= S_SYNC;
|
|
|
|
if (flags & FS_APPEND_FL)
|
|
|
|
iflags |= S_APPEND;
|
|
|
|
if (flags & FS_IMMUTABLE_FL)
|
|
|
|
iflags |= S_IMMUTABLE;
|
|
|
|
if (flags & FS_NOATIME_FL)
|
|
|
|
iflags |= S_NOATIME;
|
|
|
|
|
|
|
|
return iflags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
unsigned int flags;
|
|
|
|
|
|
|
|
if (get_user(flags, (int __user *) arg))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return ovl_ioctl_set_flags(file, cmd, arg,
|
|
|
|
ovl_fsflags_to_iflags(flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
|
|
|
|
{
|
|
|
|
unsigned int iflags = 0;
|
|
|
|
|
|
|
|
if (xflags & FS_XFLAG_SYNC)
|
|
|
|
iflags |= S_SYNC;
|
|
|
|
if (xflags & FS_XFLAG_APPEND)
|
|
|
|
iflags |= S_APPEND;
|
|
|
|
if (xflags & FS_XFLAG_IMMUTABLE)
|
|
|
|
iflags |= S_IMMUTABLE;
|
|
|
|
if (xflags & FS_XFLAG_NOATIME)
|
|
|
|
iflags |= S_NOATIME;
|
|
|
|
|
|
|
|
return iflags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
struct fsxattr fa;
|
|
|
|
|
|
|
|
memset(&fa, 0, sizeof(fa));
|
|
|
|
if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return ovl_ioctl_set_flags(file, cmd, arg,
|
|
|
|
ovl_fsxflags_to_iflags(fa.fsx_xflags));
|
|
|
|
}
|
|
|
|
|
2019-05-06 16:41:02 +09:00
|
|
|
static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
long ret;
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case FS_IOC_GETFLAGS:
|
2019-05-26 15:28:25 +09:00
|
|
|
case FS_IOC_FSGETXATTR:
|
2018-07-18 22:44:42 +09:00
|
|
|
ret = ovl_real_ioctl(file, cmd, arg);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_IOC_SETFLAGS:
|
2019-06-12 00:09:28 +09:00
|
|
|
ret = ovl_ioctl_set_fsflags(file, cmd, arg);
|
|
|
|
break;
|
|
|
|
|
2019-05-26 15:28:25 +09:00
|
|
|
case FS_IOC_FSSETXATTR:
|
2019-06-12 00:09:28 +09:00
|
|
|
ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
|
2018-07-18 22:44:42 +09:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ret = -ENOTTY;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
switch (cmd) {
|
|
|
|
case FS_IOC32_GETFLAGS:
|
|
|
|
cmd = FS_IOC_GETFLAGS;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FS_IOC32_SETFLAGS:
|
|
|
|
cmd = FS_IOC_SETFLAGS;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -ENOIOCTLCMD;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ovl_ioctl(file, cmd, arg);
|
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:42 +09:00
|
|
|
enum ovl_copyop {
|
|
|
|
OVL_COPY,
|
|
|
|
OVL_CLONE,
|
|
|
|
OVL_DEDUPE,
|
|
|
|
};
|
|
|
|
|
2018-10-30 08:41:49 +09:00
|
|
|
static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
|
2018-07-18 22:44:42 +09:00
|
|
|
struct file *file_out, loff_t pos_out,
|
2018-10-30 08:41:49 +09:00
|
|
|
loff_t len, unsigned int flags, enum ovl_copyop op)
|
2018-07-18 22:44:42 +09:00
|
|
|
{
|
|
|
|
struct inode *inode_out = file_inode(file_out);
|
|
|
|
struct fd real_in, real_out;
|
|
|
|
const struct cred *old_cred;
|
2018-10-30 08:41:49 +09:00
|
|
|
loff_t ret;
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
ret = ovl_real_fdget(file_out, &real_out);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = ovl_real_fdget(file_in, &real_in);
|
|
|
|
if (ret) {
|
|
|
|
fdput(real_out);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
|
|
|
|
switch (op) {
|
|
|
|
case OVL_COPY:
|
|
|
|
ret = vfs_copy_file_range(real_in.file, pos_in,
|
|
|
|
real_out.file, pos_out, len, flags);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OVL_CLONE:
|
2018-09-18 22:34:34 +09:00
|
|
|
ret = vfs_clone_file_range(real_in.file, pos_in,
|
2018-10-30 08:41:56 +09:00
|
|
|
real_out.file, pos_out, len, flags);
|
2018-07-18 22:44:42 +09:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OVL_DEDUPE:
|
|
|
|
ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
|
2018-10-30 08:42:03 +09:00
|
|
|
real_out.file, pos_out, len,
|
|
|
|
flags);
|
2018-07-18 22:44:42 +09:00
|
|
|
break;
|
|
|
|
}
|
FROMLIST: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem. For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.
We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials. The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.
It was not always this way. Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations. The MAC
(selinux) policies were caller's in all cases. override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases. This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem. In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules). The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com
(cherry picked from https://lore.kernel.org/lkml/20191104215253.141818-5-salyzyn@android.com/)
Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 133515582
Bug: 136124883
Bug: 129319403
Change-Id: I6a82338fcb8b30b8e6f5d4c26b473730bdfd4488
2018-06-15 03:15:22 +09:00
|
|
|
ovl_revert_creds(file_inode(file_out)->i_sb, old_cred);
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
/* Update size */
|
|
|
|
ovl_copyattr(ovl_inode_real(inode_out), inode_out);
|
|
|
|
|
|
|
|
fdput(real_in);
|
|
|
|
fdput(real_out);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
|
|
|
|
struct file *file_out, loff_t pos_out,
|
|
|
|
size_t len, unsigned int flags)
|
|
|
|
{
|
|
|
|
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
|
|
|
|
OVL_COPY);
|
|
|
|
}
|
|
|
|
|
2018-10-30 08:41:49 +09:00
|
|
|
static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
|
|
|
|
struct file *file_out, loff_t pos_out,
|
|
|
|
loff_t len, unsigned int remap_flags)
|
2018-07-18 22:44:42 +09:00
|
|
|
{
|
2018-10-30 08:41:21 +09:00
|
|
|
enum ovl_copyop op;
|
|
|
|
|
|
|
|
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (remap_flags & REMAP_FILE_DEDUP)
|
|
|
|
op = OVL_DEDUPE;
|
|
|
|
else
|
|
|
|
op = OVL_CLONE;
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't copy up because of a dedupe request, this wouldn't make sense
|
|
|
|
* most of the time (data would be duplicated instead of deduplicated).
|
|
|
|
*/
|
2018-10-30 08:41:21 +09:00
|
|
|
if (op == OVL_DEDUPE &&
|
|
|
|
(!ovl_inode_upper(file_inode(file_in)) ||
|
|
|
|
!ovl_inode_upper(file_inode(file_out))))
|
2018-07-18 22:44:42 +09:00
|
|
|
return -EPERM;
|
|
|
|
|
2018-10-30 08:41:56 +09:00
|
|
|
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
|
|
|
|
remap_flags, op);
|
2018-07-18 22:44:42 +09:00
|
|
|
}
|
|
|
|
|
2018-07-18 22:44:41 +09:00
|
|
|
const struct file_operations ovl_file_operations = {
|
|
|
|
.open = ovl_open,
|
|
|
|
.release = ovl_release,
|
|
|
|
.llseek = ovl_llseek,
|
2018-07-18 22:44:41 +09:00
|
|
|
.read_iter = ovl_read_iter,
|
2018-07-18 22:44:41 +09:00
|
|
|
.write_iter = ovl_write_iter,
|
2018-07-18 22:44:42 +09:00
|
|
|
.fsync = ovl_fsync,
|
2018-07-18 22:44:42 +09:00
|
|
|
.mmap = ovl_mmap,
|
2018-07-18 22:44:42 +09:00
|
|
|
.fallocate = ovl_fallocate,
|
2018-08-28 16:58:41 +09:00
|
|
|
.fadvise = ovl_fadvise,
|
2018-07-18 22:44:42 +09:00
|
|
|
.unlocked_ioctl = ovl_ioctl,
|
|
|
|
.compat_ioctl = ovl_compat_ioctl,
|
2020-12-14 23:26:14 +09:00
|
|
|
.splice_read = generic_file_splice_read,
|
2021-07-28 17:38:43 +09:00
|
|
|
.splice_write = ovl_splice_write,
|
2018-07-18 22:44:42 +09:00
|
|
|
|
|
|
|
.copy_file_range = ovl_copy_file_range,
|
2018-10-30 08:41:21 +09:00
|
|
|
.remap_file_range = ovl_remap_file_range,
|
2018-07-18 22:44:41 +09:00
|
|
|
};
|