From 8dd69d9f64e92529037550c97a07b1b78296e92c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 21 Oct 2022 13:21:03 -0400 Subject: [PATCH] bcachefs: KEY_TYPE_inode_v3, metadata_version_inode_v3 Move bi_size and bi_sectors into the non-varint portion of the inode, so that the write path can update them without going through the relatively expensive unpack/pack operations. Other changes: - Add a field for the offset of the varint section, so we can add new non-varint fields without needing a new inode type, like alloc_v3 - Move bi_mode into the flags field, so that the varint section can be u64 aligned Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 55 +++++++++++- fs/bcachefs/bkey_methods.c | 1 + fs/bcachefs/buckets.c | 4 +- fs/bcachefs/inode.c | 163 +++++++++++++++++++++++++++++----- fs/bcachefs/inode.h | 24 +++-- fs/bcachefs/io.c | 2 +- fs/bcachefs/recovery.c | 7 +- 7 files changed, 219 insertions(+), 37 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 66c885186160..e0e2219fb1cc 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -370,7 +370,8 @@ static inline void bkey_init(struct bkey *k) x(set, 25) \ x(lru, 26) \ x(alloc_v4, 27) \ - x(backpointer, 28) + x(backpointer, 28) \ + x(inode_v3, 29) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -721,6 +722,21 @@ struct bch_inode_v2 { __u8 fields[0]; } __packed __aligned(8); +struct bch_inode_v3 { + struct bch_val v; + + __le64 bi_journal_seq; + __le64 bi_hash_seed; + __le64 bi_flags; + __le64 bi_sectors; + __le64 bi_size; + __le64 bi_version; + __u8 fields[0]; +} __packed __aligned(8); + +#define INODEv3_FIELDS_START_INITIAL 6 +#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64)) + struct bch_inode_generation { struct bch_val v; @@ -732,7 +748,7 @@ struct bch_inode_generation { * bi_subvol and bi_parent_subvol are only set for subvolume roots: */ -#define BCH_INODE_FIELDS() \ +#define BCH_INODE_FIELDS_v2() \ x(bi_atime, 96) \ x(bi_ctime, 96) \ x(bi_mtime, 96) \ @@ -759,6 +775,31 @@ struct bch_inode_generation { x(bi_subvol, 32) \ x(bi_parent_subvol, 32) +#define BCH_INODE_FIELDS_v3() \ + x(bi_atime, 96) \ + x(bi_ctime, 96) \ + x(bi_mtime, 96) \ + x(bi_otime, 96) \ + x(bi_uid, 32) \ + x(bi_gid, 32) \ + x(bi_nlink, 32) \ + x(bi_generation, 32) \ + x(bi_dev, 32) \ + x(bi_data_checksum, 8) \ + x(bi_compression, 8) \ + x(bi_project, 32) \ + x(bi_background_compression, 8) \ + x(bi_data_replicas, 8) \ + x(bi_promote_target, 16) \ + x(bi_foreground_target, 16) \ + x(bi_background_target, 16) \ + x(bi_erasure_code, 16) \ + x(bi_fields_set, 16) \ + x(bi_dir, 64) \ + x(bi_dir_offset, 64) \ + x(bi_subvol, 32) \ + x(bi_parent_subvol, 32) + /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ x(data_checksum, 8) \ @@ -815,6 +856,13 @@ LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); +LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); +LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); + +LE64_BITMASK(INODEv3_FIELDS_START, + struct bch_inode_v3, bi_flags, 31, 36); +LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); + /* Dirents */ /* @@ -1499,7 +1547,8 @@ struct bch_sb_field_journal_seq_blacklist { x(freespace, 19) \ x(alloc_v4, 20) \ x(new_data_types, 21) \ - x(backpointers, 22) + x(backpointers, 22) \ + x(inode_v3, 23) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 45c8b2c61c5b..c7c0a9781a35 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_inode)| (1U << KEY_TYPE_inode_v2)| + (1U << KEY_TYPE_inode_v3)| (1U << KEY_TYPE_inode_generation), [BKEY_TYPE_dirents] = (1U << KEY_TYPE_deleted)| diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b657f8545a3b..9dcdfca19d52 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1123,10 +1123,10 @@ int bch2_mark_inode(struct btree_trans *trans, u64 journal_seq = trans->journal_res.seq; if (flags & BTREE_TRIGGER_INSERT) { - struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v; + struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; BUG_ON(!journal_seq); - BUG_ON(new.k->type != KEY_TYPE_inode_v2); + BUG_ON(new.k->type != KEY_TYPE_inode_v3); v->bi_journal_seq = cpu_to_le64(journal_seq); } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 9eeabe70aec1..f338cf6fd8b7 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, return bytes; } -static inline void bch2_inode_pack_inlined(struct bch_fs *c, - struct bkey_inode_buf *packed, +static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, const struct bch_inode_unpacked *inode) { - struct bkey_i_inode_v2 *k = &packed->inode; + struct bkey_i_inode_v3 *k = &packed->inode; u8 *out = k->v.fields; u8 *end = (void *) &packed[1]; u8 *last_nonzero_field = out; @@ -72,13 +71,17 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c, unsigned bytes; int ret; - bkey_inode_v2_init(&packed->inode.k_i); + bkey_inode_v3_init(&packed->inode.k_i); packed->inode.k.p.offset = inode->bi_inum; packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); packed->inode.v.bi_hash_seed = inode->bi_hash_seed; packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); - packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); - packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); + packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors); + packed->inode.v.bi_size = cpu_to_le64(inode->bi_size); + packed->inode.v.bi_version = cpu_to_le64(inode->bi_version); + SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode); + SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR); + #define x(_name, _bits) \ nr_fields++; \ @@ -99,7 +102,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c, *out++ = 0; \ } - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x BUG_ON(out > end); @@ -110,7 +113,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c, set_bkey_val_bytes(&packed->inode.k, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes); - SET_INODEv2_NR_FIELDS(&k->v, nr_fields); + SET_INODEv3_NR_FIELDS(&k->v, nr_fields); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { struct bch_inode_unpacked unpacked; @@ -120,21 +123,23 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c, BUG_ON(ret); BUG_ON(unpacked.bi_inum != inode->bi_inum); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); + BUG_ON(unpacked.bi_sectors != inode->bi_sectors); + BUG_ON(unpacked.bi_size != inode->bi_size); + BUG_ON(unpacked.bi_version != inode->bi_version); BUG_ON(unpacked.bi_mode != inode->bi_mode); #define x(_name, _bits) if (unpacked._name != inode->_name) \ panic("unpacked %llu should be %llu", \ (u64) unpacked._name, (u64) inode->_name); - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x } } -void bch2_inode_pack(struct bch_fs *c, - struct bkey_inode_buf *packed, +void bch2_inode_pack(struct bkey_inode_buf *packed, const struct bch_inode_unpacked *inode) { - bch2_inode_pack_inlined(c, packed, inode); + bch2_inode_pack_inlined(packed, inode); } static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, @@ -164,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, unpacked->_name = field[1]; \ in += ret; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v2() #undef x /* XXX: signal if there were more fields than expected? */ @@ -203,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, return -1; \ fieldnr++; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v2() #undef x /* XXX: signal if there were more fields than expected? */ return 0; } -int bch2_inode_unpack(struct bkey_s_c k, - struct bch_inode_unpacked *unpacked) +static int bch2_inode_unpack_v3(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) +{ + struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + const u8 *in = inode.v->fields; + const u8 *end = bkey_val_end(inode); + unsigned nr_fields = INODEv3_NR_FIELDS(inode.v); + unsigned fieldnr = 0; + int ret; + u64 v[2]; + + unpacked->bi_inum = inode.k->p.offset; + unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); + unpacked->bi_hash_seed = inode.v->bi_hash_seed; + unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); + unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors); + unpacked->bi_size = le64_to_cpu(inode.v->bi_size); + unpacked->bi_version = le64_to_cpu(inode.v->bi_version); + unpacked->bi_mode = INODEv3_MODE(inode.v); + +#define x(_name, _bits) \ + if (fieldnr < nr_fields) { \ + ret = bch2_varint_decode_fast(in, end, &v[0]); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + \ + if (_bits > 64) { \ + ret = bch2_varint_decode_fast(in, end, &v[1]); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + } else { \ + v[1] = 0; \ + } \ + } else { \ + v[0] = v[1] = 0; \ + } \ + \ + unpacked->_name = v[0]; \ + if (v[1] || v[0] != unpacked->_name) \ + return -1; \ + fieldnr++; + + BCH_INODE_FIELDS_v3() +#undef x + + /* XXX: signal if there were more fields than expected? */ + return 0; +} + +static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) { memset(unpacked, 0, sizeof(*unpacked)); @@ -252,6 +308,14 @@ int bch2_inode_unpack(struct bkey_s_c k, } } +int bch2_inode_unpack(struct bkey_s_c k, + struct bch_inode_unpacked *unpacked) +{ + if (likely(k.k->type == KEY_TYPE_inode_v3)) + return bch2_inode_unpack_v3(k, unpacked); + return bch2_inode_unpack_slowpath(k, unpacked); +} + int bch2_inode_peek(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, @@ -297,11 +361,32 @@ int bch2_inode_write(struct btree_trans *trans, if (IS_ERR(inode_p)) return PTR_ERR(inode_p); - bch2_inode_pack_inlined(trans->c, inode_p, inode); + bch2_inode_pack_inlined(inode_p, inode); inode_p->inode.k.p.snapshot = iter->snapshot; return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); } +struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) +{ + struct bch_inode_unpacked u; + struct bkey_inode_buf *inode_p; + int ret; + + if (!bkey_is_inode(&k->k)) + return ERR_PTR(-ENOENT); + + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return ERR_CAST(inode_p); + + ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u); + if (ret) + return ERR_PTR(ret); + + bch2_inode_pack(inode_p, &u); + return &inode_p->inode.k_i; +} + static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) { struct bch_inode_unpacked unpacked; @@ -387,15 +472,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, return __bch2_inode_invalid(k, err); } -static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) +int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, + int rw, struct printbuf *err) { - prt_printf(out, "mode %o flags %x journal_seq %llu", + struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + + if (bkey_val_bytes(k.k) < sizeof(*inode.v)) { + prt_printf(err, "incorrect value size (%zu < %zu)", + bkey_val_bytes(k.k), sizeof(*inode.v)); + return -BCH_ERR_invalid_bkey; + } + + if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) { + prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)", + INODEv3_FIELDS_START(inode.v), + INODEv3_FIELDS_START_INITIAL, + bkey_val_u64s(inode.k)); + return -BCH_ERR_invalid_bkey; + } + + if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { + prt_printf(err, "invalid str hash type (%llu >= %u)", + INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); + return -BCH_ERR_invalid_bkey; + } + + return __bch2_inode_invalid(k, err); +} + +static void __bch2_inode_unpacked_to_text(struct printbuf *out, + struct bch_inode_unpacked *inode) +{ + prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu", inode->bi_mode, inode->bi_flags, - inode->bi_journal_seq); + inode->bi_journal_seq, + inode->bi_size, + inode->bi_sectors, + inode->bi_version); #define x(_name, _bits) \ prt_printf(out, " "#_name " %llu", (u64) inode->_name); - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x } @@ -405,8 +523,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked __bch2_inode_unpacked_to_text(out, inode); } -void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bch_inode_unpacked inode; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da78ed023a30..b753e1b254e4 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,6 +9,7 @@ extern const char * const bch2_inode_opts[]; int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); +int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ @@ -25,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .atomic_trigger = bch2_mark_inode, \ }) +#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ + .key_invalid = bch2_inode_v3_invalid, \ + .val_to_text = bch2_inode_to_text, \ + .trans_trigger = bch2_trans_mark_inode, \ + .atomic_trigger = bch2_mark_inode, \ +}) + static inline bool bkey_is_inode(const struct bkey *k) { return k->type == KEY_TYPE_inode || - k->type == KEY_TYPE_inode_v2; + k->type == KEY_TYPE_inode_v2 || + k->type == KEY_TYPE_inode_v3; } int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c, @@ -52,25 +61,28 @@ struct bch_inode_unpacked { u64 bi_inum; u64 bi_journal_seq; __le64 bi_hash_seed; + u64 bi_size; + u64 bi_sectors; + u64 bi_version; u32 bi_flags; u16 bi_mode; #define x(_name, _bits) u##_bits _name; - BCH_INODE_FIELDS() + BCH_INODE_FIELDS_v3() #undef x }; struct bkey_inode_buf { - struct bkey_i_inode_v2 inode; + struct bkey_i_inode_v3 inode; #define x(_name, _bits) + 8 + _bits / 8 - u8 _pad[0 + BCH_INODE_FIELDS()]; + u8 _pad[0 + BCH_INODE_FIELDS_v3()]; #undef x } __packed __aligned(8); -void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *, - const struct bch_inode_unpacked *); +void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); +struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index a937940f5096..fb85c2bfd569 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -356,7 +356,7 @@ int bch2_extent_update(struct btree_trans *trans, } if (i_sectors_delta || new_i_size) { - bch2_inode_pack(trans->c, &inode_p, &inode_u); + bch2_inode_pack(&inode_p, &inode_u); inode_p.inode.k.p.snapshot = iter->snapshot; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2df1a541cb40..b35590226037 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1098,6 +1098,9 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; + } else if (c->sb.version < bcachefs_metadata_version_inode_v3) { + bch_info(c, "version prior to inode_v3, upgrade required"); + c->opts.version_upgrade = true; } } @@ -1482,7 +1485,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - if (c->sb.version < bcachefs_metadata_version_backpointers) + if (c->sb.version < bcachefs_metadata_version_inode_v3) c->opts.version_upgrade = true; if (c->opts.version_upgrade) { @@ -1563,7 +1566,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; - bch2_inode_pack(c, &packed_inode, &root_inode); + bch2_inode_pack(&packed_inode, &root_inode); packed_inode.inode.k.p.snapshot = U32_MAX; err = "error creating root directory";