vfs-6.12.fallocate

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZuQEwAAKCRCRxhvAZXjc
 omD7AQCZuWPXkEGYFD37MJZuRXNEoq7Tuj6yd0O2b5khUpzvyAD+MPuthGiCMPsu
 voPpUP83x7T0D3JsEsCAXtNeVRcIBQI=
 =xTs6
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.12.fallocate' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fallocate updates from Christian Brauner:
 "This contains work to try and cleanup some the fallocate mode
  handling. Currently, it confusingly mixes operation modes and an
  optional flag.

  The work here tries to better define operation modes and optional
  flags allowing the core and filesystem code to use switch statements
  to switch on the operation mode"

* tag 'vfs-6.12.fallocate' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  xfs: refactor xfs_file_fallocate
  xfs: move the xfs_is_always_cow_inode check into xfs_alloc_file_space
  xfs: call xfs_flush_unmap_range from xfs_free_file_space
  fs: sort out the fallocate mode vs flag mess
  ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE
  block: remove checks for FALLOC_FL_NO_HIDE_STALE
This commit is contained in:
Linus Torvalds 2024-09-16 09:34:08 +02:00
commit ee25861f26
7 changed files with 258 additions and 187 deletions

View file

@ -771,7 +771,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
FALLOC_FL_ZERO_RANGE)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
@ -830,14 +830,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
len >> SECTOR_SHIFT, GFP_KERNEL,
BLKDEV_ZERO_NOFALLBACK);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
if (error)
goto fail;
error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
len >> SECTOR_SHIFT, GFP_KERNEL);
break;
default:
error = -EOPNOTSUPP;
}

View file

@ -252,40 +252,39 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (offset < 0 || len <= 0)
return -EINVAL;
/* Return error if mode is not supported */
if (mode & ~FALLOC_FL_SUPPORTED_MASK)
if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
/* Punch hole and zero range are mutually exclusive */
if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
/*
* Modes are exclusive, even if that is not obvious from the encoding
* as bit masks and the mix with the flag in the same namespace.
*
* To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
* encoded as no bit set.
*/
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_ALLOCATE_RANGE:
case FALLOC_FL_UNSHARE_RANGE:
case FALLOC_FL_ZERO_RANGE:
break;
case FALLOC_FL_PUNCH_HOLE:
if (!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
break;
case FALLOC_FL_COLLAPSE_RANGE:
case FALLOC_FL_INSERT_RANGE:
if (mode & FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
break;
default:
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
if ((mode & FALLOC_FL_PUNCH_HOLE) &&
!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
/* Collapse range should only be used exclusively. */
if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
(mode & ~FALLOC_FL_COLLAPSE_RANGE))
return -EINVAL;
/* Insert range should only be used exclusively. */
if ((mode & FALLOC_FL_INSERT_RANGE) &&
(mode & ~FALLOC_FL_INSERT_RANGE))
return -EINVAL;
/* Unshare range should only be used with allocate mode. */
if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
(mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
return -EINVAL;
}
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
/*
* We can only allow pure fallocate on append only files
* On append-only files only space preallocation is supported.
*/
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
return -EPERM;

View file

@ -653,6 +653,9 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
int error;
if (xfs_is_always_cow_inode(ip))
return 0;
trace_xfs_alloc_file_space(ip);
if (xfs_is_shutdown(mp))
@ -848,6 +851,14 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*/
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
return error;
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);

View file

@ -852,6 +852,192 @@ static inline bool xfs_file_sync_writes(struct file *filp)
return false;
}
static int
xfs_falloc_newsize(
struct file *file,
int mode,
loff_t offset,
loff_t len,
loff_t *new_size)
{
struct inode *inode = file_inode(file);
if ((mode & FALLOC_FL_KEEP_SIZE) || offset + len <= i_size_read(inode))
return 0;
*new_size = offset + len;
return inode_newsize_ok(inode, *new_size);
}
static int
xfs_falloc_setsize(
struct file *file,
loff_t new_size)
{
struct iattr iattr = {
.ia_valid = ATTR_SIZE,
.ia_size = new_size,
};
if (!new_size)
return 0;
return xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file),
&iattr);
}
static int
xfs_falloc_collapse_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = i_size_read(inode) - len;
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* There is no need to overlap collapse range with EOF, in which case it
* is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode))
return -EINVAL;
error = xfs_collapse_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_insert_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t isize = i_size_read(inode);
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len)
return -EFBIG;
/* Offset should be less than i_size */
if (offset >= isize)
return -EINVAL;
error = xfs_falloc_setsize(file, isize + len);
if (error)
return error;
/*
* Perform hole insertion now that the file size has been updated so
* that if we crash during the operation we don't leave shifted extents
* past EOF and hence losing access to the data that is contained within
* them.
*/
return xfs_insert_file_space(XFS_I(inode), offset, len);
}
/*
* Punch a hole and prealloc the range. We use a hole punch rather than
* unwritten extent conversion for two reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is still zero-valued by
* virtue of the hole punch.
*/
static int
xfs_falloc_zero_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
unsigned int blksize = i_blocksize(inode);
loff_t new_size = 0;
int error;
trace_xfs_zero_file_space(XFS_I(inode));
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_free_file_space(XFS_I(inode), offset, len);
if (error)
return error;
len = round_up(offset + len, blksize) - round_down(offset, blksize);
offset = round_down(offset, blksize);
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_unshare_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_reflink_unshare(XFS_I(inode), offset, len);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_allocate_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
/*
* If always_cow mode we can't use preallocations and thus should not
* create them.
*/
if (xfs_is_always_cow_inode(XFS_I(inode)))
return -EOPNOTSUPP;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
@ -868,8 +1054,6 @@ xfs_file_fallocate(
struct xfs_inode *ip = XFS_I(inode);
long error;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
loff_t new_size = 0;
bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@ -890,156 +1074,35 @@ xfs_file_fallocate(
*/
inode_dio_wait(inode);
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*
* We care about zero and collapse here because they both run a hole
* punch over the range first. Because that can zero data, and the range
* of invalidation for the shift operations is much larger, we still do
* the required flush for collapse in xfs_prepare_shift().
*
* Insert has the same range requirements as collapse, and we extend the
* file first which can zero data. Hence insert has the same
* flush/invalidate requirements as collapse and so they are both
* handled at the right time by xfs_prepare_shift().
*/
if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_COLLAPSE_RANGE)) {
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
goto out_unlock;
}
error = file_modified(file);
if (error)
goto out_unlock;
if (mode & FALLOC_FL_PUNCH_HOLE) {
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_PUNCH_HOLE:
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* There is no need to overlap collapse range with EOF,
* in which case it is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode)) {
error = -EINVAL;
goto out_unlock;
}
new_size = i_size_read(inode) - len;
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
loff_t isize = i_size_read(inode);
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
new_size = isize + len;
/* Offset should be less than i_size */
if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
do_file_insert = true;
} else {
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
if (mode & FALLOC_FL_ZERO_RANGE) {
/*
* Punch a hole and prealloc the range. We use a hole
* punch rather than unwritten extent conversion for two
* reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is
* still zero-valued by virtue of the hole punch.
*/
unsigned int blksize = i_blocksize(inode);
trace_xfs_zero_file_space(ip);
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
len = round_up(offset + len, blksize) -
round_down(offset, blksize);
offset = round_down(offset, blksize);
} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
error = xfs_reflink_unshare(ip, offset, len);
if (error)
goto out_unlock;
} else {
/*
* If always_cow mode we can't use preallocations and
* thus should not create them.
*/
if (xfs_is_always_cow_inode(ip)) {
error = -EOPNOTSUPP;
goto out_unlock;
}
}
if (!xfs_is_always_cow_inode(ip)) {
error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
break;
case FALLOC_FL_COLLAPSE_RANGE:
error = xfs_falloc_collapse_range(file, offset, len);
break;
case FALLOC_FL_INSERT_RANGE:
error = xfs_falloc_insert_range(file, offset, len);
break;
case FALLOC_FL_ZERO_RANGE:
error = xfs_falloc_zero_range(file, mode, offset, len);
break;
case FALLOC_FL_UNSHARE_RANGE:
error = xfs_falloc_unshare_range(file, mode, offset, len);
break;
case FALLOC_FL_ALLOCATE_RANGE:
error = xfs_falloc_allocate_range(file, mode, offset, len);
break;
default:
error = -EOPNOTSUPP;
break;
}
/* Change file size if needed */
if (new_size) {
struct iattr iattr;
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
error = xfs_vn_setattr_size(file_mnt_idmap(file),
file_dentry(file), &iattr);
if (error)
goto out_unlock;
}
/*
* Perform hole insertion now that the file size has been
* updated so that if we crash during the operation we don't
* leave shifted extents past EOF and hence losing access to
* the data that is contained within them.
*/
if (do_file_insert) {
error = xfs_insert_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
if (xfs_file_sync_writes(file))
if (!error && xfs_file_sync_writes(file))
error = xfs_log_force_inode(ip);
out_unlock:

View file

@ -25,12 +25,18 @@ struct space_resv {
#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/*
* Mask of all supported fallocate modes. Only one can be set at a time.
*
* In addition to the mode bit, the mode argument can also encode flags.
* FALLOC_FL_KEEP_SIZE is the only supported flag so far.
*/
#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)

View file

@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
#define show_falloc_mode(mode) __print_flags(mode, "|", \
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
{ FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})

View file

@ -2,6 +2,7 @@
#ifndef _UAPI_FALLOC_H_
#define _UAPI_FALLOC_H_
#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */