Mark Salyzyn 168dcc6231 ANDROID: overlayfs: override_creds=off option bypass creator_cred
By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials.  The incoming accesses are
checked against the caller's credentials.

If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem.  For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.

We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials.  The module boolean parameter and mount option
override_creds is also added as a presence check for this "feature",
existence of /sys/module/overlay/parameters/override_creds.

It was not always this way.  Circa 4.6 there was no recorded mounter's
credentials, instead privileged access to upper or work directories
were temporarily increased to perform the operations.  The MAC
(selinux) policies were caller's in all cases.  override_creds=off
partially returns us to this older access model minus the insecure
temporary credential increases.  This is to permit use in a system
with non-overlapping security models for each executable including
the agent that mounts the overlayfs filesystem.  In Android
this is the case since init, which performs the mount operations,
has a minimal MAC set of privileges to reduce any attack surface,
and services that use the content have a different set of MAC
privileges (eg: read, for vendor labelled configuration, execute for
vendor libraries and modules).  The caveats are not a problem in
the Android usage model, however they should be fixed for
completeness and for general use in time.

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-unionfs@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@android.com

---
v9:
- Add to the caveats

v8:
- drop pr_warn message after straw poll to remove it.
- added a use case in the commit message

v7:
- change name of internal parameter to ovl_override_creds_def
- report override_creds only if different than default

v6:
- Drop CONFIG_OVERLAY_FS_OVERRIDE_CREDS.
- Do better with the documentation.
- pr_warn message adjusted to report consequences.

v5:
- beefed up the caveats in the Documentation
- Is dependent on
  "overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh"
  "overlayfs: check CAP_MKNOD before issuing vfs_whiteout"
- Added prwarn when override_creds=off

v4:
- spelling and grammar errors in text

v3:
- Change name from caller_credentials / creator_credentials to the
  boolean override_creds.
- Changed from creator to mounter credentials.
- Updated and fortified the documentation.
- Added CONFIG_OVERLAY_FS_OVERRIDE_CREDS

v2:
- Forward port changed attr to stat, resulting in a build error.
- altered commit message.

Signed-off-by: Mark Salyzyn <salyzyn@google.com>
(cherry picked from https://lore.kernel.org/patchwork/patch/1009299)
Bug: 109821005
Bug: 112955896
Bug: 127298877
Change-Id: I1d99298ec5e71174734481be3497763c6b9d42e1
2019-03-06 15:29:01 +00:00

1119 lines
25 KiB
C

/*
*
* Copyright (C) 2011 Novell Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/module.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/atomic.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
static unsigned short ovl_redirect_max = 256;
module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
dget(wdentry);
if (d_is_dir(wdentry))
err = ovl_do_rmdir(wdir, wdentry);
else
err = ovl_do_unlink(wdir, wdentry);
dput(wdentry);
if (err) {
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
return err;
}
struct dentry *ovl_lookup_temp(struct dentry *workdir)
{
struct dentry *temp;
char name[20];
static atomic_t temp_id = ATOMIC_INIT(0);
/* counter is allowed to wrap, since temp dentries are ephemeral */
snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
temp = lookup_one_len(name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
pr_err("overlayfs: workdir/%s already exists\n", name);
dput(temp);
temp = ERR_PTR(-EIO);
}
return temp;
}
/* caller holds i_mutex on workdir */
static struct dentry *ovl_whiteout(struct dentry *workdir,
struct dentry *dentry)
{
int err;
struct dentry *whiteout;
struct inode *wdir = workdir->d_inode;
whiteout = ovl_lookup_temp(workdir);
if (IS_ERR(whiteout))
return whiteout;
err = ovl_do_whiteout(wdir, whiteout);
if (err) {
dput(whiteout);
whiteout = ERR_PTR(err);
}
return whiteout;
}
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct cattr *attr, struct dentry *hardlink, bool debug)
{
int err;
if (newdentry->d_inode)
return -ESTALE;
if (hardlink) {
err = ovl_do_link(hardlink, dir, newdentry, debug);
} else {
switch (attr->mode & S_IFMT) {
case S_IFREG:
err = ovl_do_create(dir, newdentry, attr->mode, debug);
break;
case S_IFDIR:
err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
err = ovl_do_mknod(dir, newdentry,
attr->mode, attr->rdev, debug);
break;
case S_IFLNK:
err = ovl_do_symlink(dir, newdentry, attr->link, debug);
break;
default:
err = -EPERM;
}
}
if (!err && WARN_ON(!newdentry->d_inode)) {
/*
* Not quite sure if non-instantiated dentry is legal or not.
* VFS doesn't seem to care so check and warn here.
*/
err = -ENOENT;
}
return err;
}
static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
int xerr)
{
int err;
err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
return err;
}
static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
{
/*
* Fail with -EIO when trying to create opaque dir and upper doesn't
* support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
* return a specific error for noxattr case.
*/
return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
}
/* Common operations required to be done after creation of file on upper */
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
ovl_inode_update(inode, newdentry);
ovl_copyattr(newdentry->d_inode, inode);
} else {
WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
dput(newdentry);
inc_nlink(inode);
}
d_instantiate(dentry, inode);
/* Force lookup of new upper hardlink to find its lower */
if (hardlink)
d_drop(dentry);
}
static bool ovl_type_merge(struct dentry *dentry)
{
return OVL_TYPE_MERGE(ovl_path_type(dentry));
}
static bool ovl_type_origin(struct dentry *dentry)
{
return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
}
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *udir = upperdir->d_inode;
struct dentry *newdentry;
int err;
if (!hardlink && !IS_POSIXACL(udir))
attr->mode &= ~current_umask();
inode_lock_nested(udir, I_MUTEX_PARENT);
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
err = ovl_create_real(udir, newdentry, attr, hardlink, false);
if (err)
goto out_dput;
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
ovl_instantiate(dentry, inode, newdentry, !!hardlink);
newdentry = NULL;
out_dput:
dput(newdentry);
out_unlock:
inode_unlock(udir);
return err;
}
static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct list_head *list)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *udir = upperdir->d_inode;
struct path upperpath;
struct dentry *upper;
struct dentry *opaquedir;
struct kstat stat;
int err;
if (WARN_ON(!workdir))
return ERR_PTR(-EROFS);
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
ovl_path_upper(dentry, &upperpath);
err = vfs_getattr(&upperpath, &stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
goto out_unlock;
err = -ESTALE;
if (!S_ISDIR(stat.mode))
goto out_unlock;
upper = upperpath.dentry;
if (upper->d_parent->d_inode != udir)
goto out_unlock;
opaquedir = ovl_lookup_temp(workdir);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out_unlock;
err = ovl_create_real(wdir, opaquedir,
&(struct cattr){.mode = stat.mode}, NULL, true);
if (err)
goto out_dput;
err = ovl_copy_xattr(upper, opaquedir);
if (err)
goto out_cleanup;
err = ovl_set_opaque(dentry, opaquedir);
if (err)
goto out_cleanup;
inode_lock(opaquedir->d_inode);
err = ovl_set_attr(opaquedir, &stat);
inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
if (err)
goto out_cleanup;
ovl_cleanup_whiteouts(upper, list);
ovl_cleanup(wdir, upper);
unlock_rename(workdir, upperdir);
/* dentry's upper doesn't match now, get rid of it */
d_drop(dentry);
return opaquedir;
out_cleanup:
ovl_cleanup(wdir, opaquedir);
out_dput:
dput(opaquedir);
out_unlock:
unlock_rename(workdir, upperdir);
out:
return ERR_PTR(err);
}
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
{
int err;
struct dentry *ret = NULL;
enum ovl_path_type type = ovl_path_type(dentry);
LIST_HEAD(list);
err = ovl_check_empty_dir(dentry, &list);
if (err) {
ret = ERR_PTR(err);
goto out_free;
}
/*
* When removing an empty opaque directory, then it makes no sense to
* replace it with an exact replica of itself.
*
* If no upperdentry then skip clearing whiteouts.
*
* Can race with copy-up, since we don't hold the upperdir mutex.
* Doesn't matter, since copy-up can't create a non-empty directory
* from an empty one.
*/
if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
ret = ovl_clear_empty(dentry, &list);
out_free:
ovl_cache_free(&list);
return ret;
}
static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
const struct posix_acl *acl)
{
void *buffer;
size_t size;
int err;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
return 0;
size = posix_acl_to_xattr(NULL, acl, NULL, 0);
buffer = kmalloc(size, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
err = size;
if (err < 0)
goto out_free;
err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
out_free:
kfree(buffer);
return err;
}
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct cattr *cattr,
struct dentry *hardlink)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *udir = upperdir->d_inode;
struct dentry *upper;
struct dentry *newdentry;
int err;
struct posix_acl *acl, *default_acl;
if (WARN_ON(!workdir))
return -EROFS;
if (!hardlink) {
err = posix_acl_create(dentry->d_parent->d_inode,
&cattr->mode, &default_acl, &acl);
if (err)
return err;
}
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
newdentry = ovl_lookup_temp(workdir);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_dput;
err = -ESTALE;
if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
goto out_dput2;
err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
if (err)
goto out_dput2;
/*
* mode could have been mutilated due to umask (e.g. sgid directory)
*/
if (!hardlink &&
!S_ISLNK(cattr->mode) &&
newdentry->d_inode->i_mode != cattr->mode) {
struct iattr attr = {
.ia_valid = ATTR_MODE,
.ia_mode = cattr->mode,
};
inode_lock(newdentry->d_inode);
err = notify_change(newdentry, &attr, NULL);
inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
}
if (!hardlink) {
err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
acl);
if (err)
goto out_cleanup;
err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
default_acl);
if (err)
goto out_cleanup;
}
if (!hardlink && S_ISDIR(cattr->mode)) {
err = ovl_set_opaque(dentry, newdentry);
if (err)
goto out_cleanup;
err = ovl_do_rename(wdir, newdentry, udir, upper,
RENAME_EXCHANGE);
if (err)
goto out_cleanup;
ovl_cleanup(wdir, upper);
} else {
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
if (err)
goto out_cleanup;
}
ovl_instantiate(dentry, inode, newdentry, !!hardlink);
newdentry = NULL;
out_dput2:
dput(upper);
out_dput:
dput(newdentry);
out_unlock:
unlock_rename(workdir, upperdir);
out:
if (!hardlink) {
posix_acl_release(acl);
posix_acl_release(default_acl);
}
return err;
out_cleanup:
ovl_cleanup(wdir, newdentry);
goto out_dput2;
}
static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink,
bool origin)
{
int err;
const struct cred *old_cred;
struct cred *override_cred;
struct dentry *parent = dentry->d_parent;
err = ovl_copy_up(parent);
if (err)
return err;
old_cred = ovl_override_creds(dentry->d_sb);
/*
* When linking a file with copy up origin into a new parent, mark the
* new parent dir "impure".
*/
if (origin) {
err = ovl_set_impure(parent, ovl_dentry_upper(parent));
if (err)
goto out_revert_creds;
}
err = -ENOMEM;
override_cred = prepare_creds();
if (override_cred) {
override_cred->fsuid = inode->i_uid;
override_cred->fsgid = inode->i_gid;
if (!hardlink) {
err = security_dentry_create_files_as(dentry,
attr->mode, &dentry->d_name,
old_cred ? old_cred : current_cred(),
override_cred);
if (err) {
put_cred(override_cred);
goto out_revert_creds;
}
}
put_cred(override_creds(override_cred));
put_cred(override_cred);
if (!ovl_dentry_is_whiteout(dentry))
err = ovl_create_upper(dentry, inode, attr,
hardlink);
else
err = ovl_create_over_whiteout(dentry, inode, attr,
hardlink);
}
out_revert_creds:
ovl_revert_creds(old_cred);
if (!err) {
struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
WARN_ON(inode->i_mode != realinode->i_mode);
WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
}
return err;
}
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
const char *link)
{
int err;
struct inode *inode;
struct cattr attr = {
.rdev = rdev,
.link = link,
};
err = ovl_want_write(dentry);
if (err)
goto out;
err = -ENOMEM;
inode = ovl_new_inode(dentry->d_sb, mode, rdev);
if (!inode)
goto out_drop_write;
inode_init_owner(inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
err = ovl_create_or_link(dentry, inode, &attr, NULL, false);
if (err)
iput(inode);
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
}
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
}
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t rdev)
{
/* Don't allow creation of "whiteout" on overlay */
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
return -EPERM;
return ovl_create_object(dentry, mode, rdev, NULL);
}
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
const char *link)
{
return ovl_create_object(dentry, S_IFLNK, 0, link);
}
static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
if (err)
goto out;
err = ovl_copy_up(old);
if (err)
goto out_drop_write;
err = ovl_copy_up(new->d_parent);
if (err)
goto out_drop_write;
err = ovl_nlink_start(old, &locked);
if (err)
goto out_drop_write;
inode = d_inode(old);
ihold(inode);
err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old),
ovl_type_origin(old));
if (err)
iput(inode);
ovl_nlink_end(old, locked);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
{
return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
}
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *udir = upperdir->d_inode;
struct dentry *whiteout;
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
int flags = 0;
if (WARN_ON(!workdir))
return -EROFS;
if (is_dir) {
opaquedir = ovl_check_empty_and_clear(dentry);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out;
}
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out_dput;
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_unlock;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && ovl_dentry_upper(dentry) &&
!ovl_matches_upper(dentry, upper))) {
goto out_dput_upper;
}
whiteout = ovl_whiteout(workdir, dentry);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
goto out_dput_upper;
if (d_is_dir(upper))
flags = RENAME_EXCHANGE;
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
if (err)
goto kill_whiteout;
if (flags)
ovl_cleanup(wdir, upper);
ovl_dentry_version_inc(dentry->d_parent, true);
out_d_drop:
d_drop(dentry);
dput(whiteout);
out_dput_upper:
dput(upper);
out_unlock:
unlock_rename(workdir, upperdir);
out_dput:
dput(opaquedir);
out:
return err;
kill_whiteout:
ovl_cleanup(wdir, whiteout);
goto out_d_drop;
}
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
{
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
/* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
if (is_dir && ovl_dentry_get_redirect(dentry)) {
opaquedir = ovl_check_empty_and_clear(dentry);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out;
}
inode_lock_nested(dir, I_MUTEX_PARENT);
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_unlock;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && !ovl_matches_upper(dentry, upper)))
goto out_dput_upper;
if (is_dir)
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
/*
* Keeping this dentry hashed would mean having to release
* upperpath/lowerpath, which could only be done if we are the
* sole user of this dentry. Too tricky... Just unhash for
* now.
*/
if (!err)
d_drop(dentry);
out_dput_upper:
dput(upper);
out_unlock:
inode_unlock(dir);
dput(opaquedir);
out:
return err;
}
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
int err;
bool locked = false;
const struct cred *old_cred;
err = ovl_want_write(dentry);
if (err)
goto out;
err = ovl_copy_up(dentry->d_parent);
if (err)
goto out_drop_write;
err = ovl_nlink_start(dentry, &locked);
if (err)
goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
if (!ovl_lower_positive(dentry))
err = ovl_remove_upper(dentry, is_dir);
else
err = ovl_remove_and_whiteout(dentry, is_dir);
ovl_revert_creds(old_cred);
if (!err) {
if (is_dir)
clear_nlink(dentry->d_inode);
else
drop_nlink(dentry->d_inode);
}
ovl_nlink_end(dentry, locked);
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
{
return ovl_do_remove(dentry, false);
}
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
{
return ovl_do_remove(dentry, true);
}
static bool ovl_type_merge_or_lower(struct dentry *dentry)
{
enum ovl_path_type type = ovl_path_type(dentry);
return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
}
static bool ovl_can_move(struct dentry *dentry)
{
return ovl_redirect_dir(dentry->d_sb) ||
!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
}
static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
{
char *buf, *ret;
struct dentry *d, *tmp;
int buflen = ovl_redirect_max + 1;
if (samedir) {
ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
GFP_KERNEL);
goto out;
}
buf = ret = kmalloc(buflen, GFP_KERNEL);
if (!buf)
goto out;
buflen--;
buf[buflen] = '\0';
for (d = dget(dentry); !IS_ROOT(d);) {
const char *name;
int thislen;
spin_lock(&d->d_lock);
name = ovl_dentry_get_redirect(d);
if (name) {
thislen = strlen(name);
} else {
name = d->d_name.name;
thislen = d->d_name.len;
}
/* If path is too long, fall back to userspace move */
if (thislen + (name[0] != '/') > buflen) {
ret = ERR_PTR(-EXDEV);
spin_unlock(&d->d_lock);
goto out_put;
}
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
dput(d);
d = tmp;
/* Absolute redirect: finished */
if (buf[buflen] == '/')
break;
buflen--;
buf[buflen] = '/';
}
ret = kstrdup(&buf[buflen], GFP_KERNEL);
out_put:
dput(d);
kfree(buf);
out:
return ret ? ret : ERR_PTR(-ENOMEM);
}
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
{
int err;
const char *redirect = ovl_dentry_get_redirect(dentry);
if (redirect && (samedir || redirect[0] == '/'))
return 0;
redirect = ovl_get_redirect(dentry, samedir);
if (IS_ERR(redirect))
return PTR_ERR(redirect);
err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
OVL_XATTR_REDIRECT,
redirect, strlen(redirect), -EXDEV);
if (!err) {
spin_lock(&dentry->d_lock);
ovl_dentry_set_redirect(dentry, redirect);
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
/* Fall back to userspace copy-up */
err = -EXDEV;
}
return err;
}
static int ovl_rename(struct inode *olddir, struct dentry *old,
struct inode *newdir, struct dentry *new,
unsigned int flags)
{
int err;
bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
struct dentry *newdentry;
struct dentry *trap;
bool old_opaque;
bool new_opaque;
bool cleanup_whiteout = false;
bool overwrite = !(flags & RENAME_EXCHANGE);
bool is_dir = d_is_dir(old);
bool new_is_dir = d_is_dir(new);
bool samedir = olddir == newdir;
struct dentry *opaquedir = NULL;
const struct cred *old_cred = NULL;
err = -EINVAL;
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
goto out;
flags &= ~RENAME_NOREPLACE;
/* Don't copy up directory trees */
err = -EXDEV;
if (!ovl_can_move(old))
goto out;
if (!overwrite && !ovl_can_move(new))
goto out;
err = ovl_want_write(old);
if (err)
goto out;
err = ovl_copy_up(old);
if (err)
goto out_drop_write;
err = ovl_copy_up(new->d_parent);
if (err)
goto out_drop_write;
if (!overwrite) {
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
} else {
err = ovl_nlink_start(new, &locked);
if (err)
goto out_drop_write;
}
old_cred = ovl_override_creds(old->d_sb);
if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
opaquedir = ovl_check_empty_and_clear(new);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir)) {
opaquedir = NULL;
goto out_revert_creds;
}
}
if (overwrite) {
if (ovl_lower_positive(old)) {
if (!ovl_dentry_is_whiteout(new)) {
/* Whiteout source */
flags |= RENAME_WHITEOUT;
} else {
/* Switch whiteouts */
flags |= RENAME_EXCHANGE;
}
} else if (is_dir && ovl_dentry_is_whiteout(new)) {
flags |= RENAME_EXCHANGE;
cleanup_whiteout = true;
}
}
old_upperdir = ovl_dentry_upper(old->d_parent);
new_upperdir = ovl_dentry_upper(new->d_parent);
if (!samedir) {
/*
* When moving a merge dir or non-dir with copy up origin into
* a new parent, we are marking the new parent dir "impure".
* When ovl_iterate() iterates an "impure" upper dir, it will
* lookup the origin inodes of the entries to fill d_ino.
*/
if (ovl_type_origin(old)) {
err = ovl_set_impure(new->d_parent, new_upperdir);
if (err)
goto out_revert_creds;
}
if (!overwrite && ovl_type_origin(new)) {
err = ovl_set_impure(old->d_parent, old_upperdir);
if (err)
goto out_revert_creds;
}
}
trap = lock_rename(new_upperdir, old_upperdir);
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
old->d_name.len);
err = PTR_ERR(olddentry);
if (IS_ERR(olddentry))
goto out_unlock;
err = -ESTALE;
if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
new->d_name.len);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_dput_old;
old_opaque = ovl_dentry_is_opaque(old);
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
goto out_dput;
} else {
if (!ovl_matches_upper(new, newdentry))
goto out_dput;
}
} else {
if (!d_is_negative(newdentry) &&
(!new_opaque || !ovl_is_whiteout(newdentry)))
goto out_dput;
}
if (olddentry == trap)
goto out_dput;
if (newdentry == trap)
goto out_dput;
if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
goto out_dput;
err = 0;
if (is_dir) {
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (!old_opaque && ovl_type_merge(new->d_parent))
err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
if (err)
goto out_dput;
}
if (!overwrite && new_is_dir) {
if (ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
else if (!new_opaque && ovl_type_merge(old->d_parent))
err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
if (err)
goto out_dput;
}
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
new_upperdir->d_inode, newdentry, flags);
if (err)
goto out_dput;
if (cleanup_whiteout)
ovl_cleanup(old_upperdir->d_inode, newdentry);
if (overwrite && d_inode(new)) {
if (new_is_dir)
clear_nlink(d_inode(new));
else
drop_nlink(d_inode(new));
}
ovl_dentry_version_inc(old->d_parent,
!overwrite && ovl_type_origin(new));
ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old));
out_dput:
dput(newdentry);
out_dput_old:
dput(olddentry);
out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
ovl_revert_creds(old_cred);
ovl_nlink_end(new, locked);
out_drop_write:
ovl_drop_write(old);
out:
dput(opaquedir);
return err;
}
const struct inode_operations ovl_dir_inode_operations = {
.lookup = ovl_lookup,
.mkdir = ovl_mkdir,
.symlink = ovl_symlink,
.unlink = ovl_unlink,
.rmdir = ovl_rmdir,
.rename = ovl_rename,
.link = ovl_link,
.setattr = ovl_setattr,
.create = ovl_create,
.mknod = ovl_mknod,
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
};