kernel-hacking-2024-linux-s.../fs/nfsd/nfsxdr.c
Chuck Lever 8154ef2776 NFSD: Clean up legacy NFS WRITE argument XDR decoders
Move common code in NFSD's legacy NFS WRITE decoders into a helper.
The immediate benefit is reduction of code duplication and some nice
micro-optimizations (see below).

In the long term, this helper can perform a per-transport call-out
to fill the rq_vec (say, using RDMA Reads).

The legacy WRITE decoders and procs are changed to work like NFSv4,
which constructs the rq_vec just before it is about to call
vfs_writev.

Why? Calling a transport call-out from the proc instead of the XDR
decoder means that the incoming FH can be resolved to a particular
filesystem and file. This would allow pages from the backing file to
be presented to the transport to be filled, rather than presenting
anonymous pages and copying or flipping them into the file's page
cache later.

I also prefer using the pages in rq_arg.pages, instead of pulling
the data pages directly out of the rqstp::rq_pages array. This is
currently the way the NFSv3 write decoder works, but the other two
do not seem to take this approach. Fixing this removes the only
reference to rq_pages found in NFSD, eliminating an NFSD assumption
about how transports use the pages in rq_pages.

Lastly, avoid setting up the first element of rq_vec as a zero-
length buffer. This happens with an RDMA transport when a normal
Read chunk is present because the data payload is in rq_arg's
page list (none of it is in the head buffer).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2018-04-03 15:08:16 -04:00

560 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* XDR support for nfsd
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#include "vfs.h"
#include "xdr.h"
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
/*
* Mapping of S_IF* types to NFS file types
*/
static u32 nfs_ftypes[] = {
NFNON, NFCHR, NFCHR, NFBAD,
NFDIR, NFBAD, NFBLK, NFBAD,
NFREG, NFBAD, NFLNK, NFBAD,
NFSOCK, NFBAD, NFLNK, NFBAD,
};
/*
* XDR functions for basic NFS types
*/
static __be32 *
decode_fh(__be32 *p, struct svc_fh *fhp)
{
fh_init(fhp, NFS_FHSIZE);
memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
fhp->fh_handle.fh_size = NFS_FHSIZE;
/* FIXME: Look up export pointer here and verify
* Sun Secure RPC if requested */
return p + (NFS_FHSIZE >> 2);
}
/* Helper function for NFSv2 ACL code */
__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
{
return decode_fh(p, fhp);
}
static __be32 *
encode_fh(__be32 *p, struct svc_fh *fhp)
{
memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
return p + (NFS_FHSIZE>> 2);
}
/*
* Decode a file name and make sure that the path contains
* no slashes or null bytes.
*/
static __be32 *
decode_filename(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
if (*name == '\0' || *name == '/')
return NULL;
}
}
return p;
}
static __be32 *
decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
{
char *name;
unsigned int i;
if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
for (i = 0, name = *namp; i < *lenp; i++, name++) {
if (*name == '\0')
return NULL;
}
}
return p;
}
static __be32 *
decode_sattr(__be32 *p, struct iattr *iap)
{
u32 tmp, tmp1;
iap->ia_valid = 0;
/* Sun client bug compatibility check: some sun clients seem to
* put 0xffff in the mode field when they mean 0xffffffff.
* Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah.
*/
if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) {
iap->ia_valid |= ATTR_MODE;
iap->ia_mode = tmp;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
iap->ia_uid = make_kuid(&init_user_ns, tmp);
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
iap->ia_gid = make_kgid(&init_user_ns, tmp);
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
iap->ia_valid |= ATTR_SIZE;
iap->ia_size = tmp;
}
tmp = ntohl(*p++); tmp1 = ntohl(*p++);
if (tmp != (u32)-1 && tmp1 != (u32)-1) {
iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
iap->ia_atime.tv_sec = tmp;
iap->ia_atime.tv_nsec = tmp1 * 1000;
}
tmp = ntohl(*p++); tmp1 = ntohl(*p++);
if (tmp != (u32)-1 && tmp1 != (u32)-1) {
iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
iap->ia_mtime.tv_sec = tmp;
iap->ia_mtime.tv_nsec = tmp1 * 1000;
/*
* Passing the invalid value useconds=1000000 for mtime
* is a Sun convention for "set both mtime and atime to
* current server time". It's needed to make permissions
* checks for the "touch" program across v2 mounts to
* Solaris and Irix boxes work correctly. See description of
* sattr in section 6.1 of "NFS Illustrated" by
* Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
*/
if (tmp1 == 1000000)
iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET);
}
return p;
}
static __be32 *
encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
struct dentry *dentry = fhp->fh_dentry;
int type;
struct timespec time;
u32 f;
type = (stat->mode & S_IFMT);
*p++ = htonl(nfs_ftypes[type >> 12]);
*p++ = htonl((u32) stat->mode);
*p++ = htonl((u32) stat->nlink);
*p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
*p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
*p++ = htonl(NFS_MAXPATHLEN);
} else {
*p++ = htonl((u32) stat->size);
}
*p++ = htonl((u32) stat->blksize);
if (S_ISCHR(type) || S_ISBLK(type))
*p++ = htonl(new_encode_dev(stat->rdev));
else
*p++ = htonl(0xffffffff);
*p++ = htonl((u32) stat->blocks);
switch (fsid_source(fhp)) {
default:
case FSIDSOURCE_DEV:
*p++ = htonl(new_encode_dev(stat->dev));
break;
case FSIDSOURCE_FSID:
*p++ = htonl((u32) fhp->fh_export->ex_fsid);
break;
case FSIDSOURCE_UUID:
f = ((u32*)fhp->fh_export->ex_uuid)[0];
f ^= ((u32*)fhp->fh_export->ex_uuid)[1];
f ^= ((u32*)fhp->fh_export->ex_uuid)[2];
f ^= ((u32*)fhp->fh_export->ex_uuid)[3];
*p++ = htonl(f);
break;
}
*p++ = htonl((u32) stat->ino);
*p++ = htonl((u32) stat->atime.tv_sec);
*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
time = stat->mtime;
lease_get_mtime(d_inode(dentry), &time);
*p++ = htonl((u32) time.tv_sec);
*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0);
*p++ = htonl((u32) stat->ctime.tv_sec);
*p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0);
return p;
}
/* Helper function for NFSv2 ACL code */
__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat)
{
return encode_fattr(rqstp, p, fhp, stat);
}
/*
* XDR decode functions
*/
int
nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
{
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_fhandle *args = rqstp->rq_argp;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_sattrargs *args = rqstp->rq_argp;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_diropargs *args = rqstp->rq_argp;
if (!(p = decode_fh(p, &args->fh))
|| !(p = decode_filename(p, &args->name, &args->len)))
return 0;
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readargs *args = rqstp->rq_argp;
unsigned int len;
int v;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->offset = ntohl(*p++);
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
struct page *p = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(p);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_writeargs *args = rqstp->rq_argp;
unsigned int len, hdr, dlen;
struct kvec *head = rqstp->rq_arg.head;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p++; /* beginoffset */
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
len = args->len = ntohl(*p++);
/*
* The protocol specifies a maximum of 8192 bytes.
*/
if (len > NFSSVC_MAXBLKSIZE_V2)
return 0;
/*
* Check to make sure that we got the right number of
* bytes.
*/
hdr = (void*)p - head->iov_base;
if (hdr > head->iov_len)
return 0;
dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
/*
* Round the length of the data which was specified up to
* the next multiple of XDR units and then compare that
* against the length which was actually received.
* Note that when RPCSEC/GSS (for example) is used, the
* data buffer can be padded so dlen might be larger
* than required. It must never be smaller.
*/
if (dlen < XDR_QUADLEN(len)*4)
return 0;
args->first.iov_base = (void *)p;
args->first.iov_len = head->iov_len - hdr;
return 1;
}
int
nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_createargs *args = rqstp->rq_argp;
if ( !(p = decode_fh(p, &args->fh))
|| !(p = decode_filename(p, &args->name, &args->len)))
return 0;
p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_renameargs *args = rqstp->rq_argp;
if (!(p = decode_fh(p, &args->ffh))
|| !(p = decode_filename(p, &args->fname, &args->flen))
|| !(p = decode_fh(p, &args->tfh))
|| !(p = decode_filename(p, &args->tname, &args->tlen)))
return 0;
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readlinkargs *args = rqstp->rq_argp;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_linkargs *args = rqstp->rq_argp;
if (!(p = decode_fh(p, &args->ffh))
|| !(p = decode_fh(p, &args->tfh))
|| !(p = decode_filename(p, &args->tname, &args->tlen)))
return 0;
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_symlinkargs *args = rqstp->rq_argp;
if ( !(p = decode_fh(p, &args->ffh))
|| !(p = decode_filename(p, &args->fname, &args->flen))
|| !(p = decode_pathname(p, &args->tname, &args->tlen)))
return 0;
p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
int
nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readdirargs *args = rqstp->rq_argp;
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
}
/*
* XDR encode functions
*/
int
nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
{
return xdr_ressize_check(rqstp, p);
}
int
nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_attrstat *resp = rqstp->rq_resp;
p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
return xdr_ressize_check(rqstp, p);
}
int
nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_diropres *resp = rqstp->rq_resp;
p = encode_fh(p, &resp->fh);
p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
return xdr_ressize_check(rqstp, p);
}
int
nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readlinkres *resp = rqstp->rq_resp;
*p++ = htonl(resp->len);
xdr_ressize_check(rqstp, p);
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
}
return 1;
}
int
nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readres *resp = rqstp->rq_resp;
p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
*p++ = htonl(resp->count);
xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data as well */
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
}
return 1;
}
int
nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readdirres *resp = rqstp->rq_resp;
xdr_ressize_check(rqstp, p);
p = resp->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl((resp->common.err == nfserr_eof));
rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1;
return 1;
}
int
nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_statfsres *resp = rqstp->rq_resp;
struct kstatfs *stat = &resp->stats;
*p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
*p++ = htonl(stat->f_bsize);
*p++ = htonl(stat->f_blocks);
*p++ = htonl(stat->f_bfree);
*p++ = htonl(stat->f_bavail);
return xdr_ressize_check(rqstp, p);
}
int
nfssvc_encode_entry(void *ccdv, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int d_type)
{
struct readdir_cd *ccd = ccdv;
struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
__be32 *p = cd->buffer;
int buflen, slen;
/*
dprintk("nfsd: entry(%.*s off %ld ino %ld)\n",
namlen, name, offset, ino);
*/
if (offset > ~((u32) 0)) {
cd->common.err = nfserr_fbig;
return -EINVAL;
}
if (cd->offset)
*cd->offset = htonl(offset);
/* truncate filename */
namlen = min(namlen, NFS2_MAXNAMLEN);
slen = XDR_QUADLEN(namlen);
if ((buflen = cd->buflen - slen - 4) < 0) {
cd->common.err = nfserr_toosmall;
return -EINVAL;
}
if (ino > ~((u32) 0)) {
cd->common.err = nfserr_fbig;
return -EINVAL;
}
*p++ = xdr_one; /* mark entry present */
*p++ = htonl((u32) ino); /* file id */
p = xdr_encode_array(p, name, namlen);/* name length & name */
cd->offset = p; /* remember pointer */
*p++ = htonl(~0U); /* offset of next entry */
cd->buflen = buflen;
cd->buffer = p;
cd->common.err = nfs_ok;
return 0;
}
/*
* XDR release functions
*/
void
nfssvc_release_fhandle(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *resp = rqstp->rq_resp;
fh_put(&resp->fh);
}