bcachefs: Journal seq now incremented at entry open, not close

This patch changes journal_entry_open() to initialize the new journal
entry, not __journal_entry_close().

This also means that journal_cur_seq() refers to the sequence number of
the last journal entry when we don't have an open journal entry, not the
next one.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-02-28 16:35:42 -05:00 committed by Kent Overstreet
parent b66fbf3342
commit f0a3a2ccab
5 changed files with 44 additions and 98 deletions

View file

@ -781,7 +781,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (initial) { if (initial) {
BUG_ON(bch2_journal_seq_verify && BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > journal_cur_seq(&c->journal)); k->k->version.lo > atomic64_read(&c->journal.seq));
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k); ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
if (ret) if (ret)

View file

@ -21,16 +21,12 @@
static u64 last_unwritten_seq(struct journal *j) static u64 last_unwritten_seq(struct journal *j)
{ {
union journal_res_state s = READ_ONCE(j->reservations); return j->seq_ondisk + 1;
lockdep_assert_held(&j->lock);
return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK);
} }
static inline bool journal_seq_unwritten(struct journal *j, u64 seq) static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{ {
return seq >= last_unwritten_seq(j); return seq > j->seq_ondisk;
} }
static bool __journal_entry_is_open(union journal_res_state state) static bool __journal_entry_is_open(union journal_res_state state)
@ -49,8 +45,6 @@ journal_seq_to_buf(struct journal *j, u64 seq)
struct journal_buf *buf = NULL; struct journal_buf *buf = NULL;
EBUG_ON(seq > journal_cur_seq(j)); EBUG_ON(seq > journal_cur_seq(j));
EBUG_ON(seq == journal_cur_seq(j) &&
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
if (journal_seq_unwritten(j, seq)) { if (journal_seq_unwritten(j, seq)) {
buf = j->buf + (seq & JOURNAL_BUF_MASK); buf = j->buf + (seq & JOURNAL_BUF_MASK);
@ -68,31 +62,6 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
p->devs.nr = 0; p->devs.nr = 0;
} }
static void journal_pin_new_entry(struct journal *j)
{
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
}
static void bch2_journal_buf_init(struct journal *j)
{
struct journal_buf *buf = journal_cur_buf(j);
bkey_extent_init(&buf->key);
buf->noflush = false;
buf->must_flush = false;
buf->separate_flush = false;
buf->flush_time = 0;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
}
void bch2_journal_halt(struct journal *j) void bch2_journal_halt(struct journal *j)
{ {
union journal_res_state old, new; union journal_res_state old, new;
@ -200,11 +169,6 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq)); __bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
/* Initialize new buffer: */
journal_pin_new_entry(j);
bch2_journal_buf_init(j);
cancel_delayed_work(&j->write_work); cancel_delayed_work(&j->write_work);
bch2_journal_space_available(j); bch2_journal_space_available(j);
@ -274,27 +238,47 @@ static int journal_entry_open(struct journal *j)
if (bch2_journal_error(j)) if (bch2_journal_error(j))
return cur_entry_insufficient_devices; /* -EROFS */ return cur_entry_insufficient_devices; /* -EROFS */
if (!fifo_free(&j->pin))
return cur_entry_journal_pin_full;
BUG_ON(!j->cur_entry_sectors); BUG_ON(!j->cur_entry_sectors);
/* We used to add things to the first journal entry before opening it, buf->expires =
* as a way to deal with a chicken-and-the-egg problem, but we shouldn't (journal_cur_seq(j) == j->flushed_seq_ondisk
* be anymore: ? jiffies
*/ : j->last_flush_write) +
BUG_ON(buf->data->u64s);
buf->expires = jiffies +
msecs_to_jiffies(c->opts.journal_flush_delay); msecs_to_jiffies(c->opts.journal_flush_delay);
buf->u64s_reserved = j->entry_u64s_reserved; buf->u64s_reserved = j->entry_u64s_reserved;
buf->disk_sectors = j->cur_entry_sectors; buf->disk_sectors = j->cur_entry_sectors;
buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9); buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
u64s = (int) (buf->sectors << 9) / sizeof(u64) - u64s = (int) (buf->sectors << 9) / sizeof(u64) -
journal_entry_overhead(j); journal_entry_overhead(j);
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= 0) if (u64s <= 0)
return cur_entry_journal_full; return cur_entry_journal_full;
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
bkey_extent_init(&buf->key);
buf->noflush = false;
buf->must_flush = false;
buf->separate_flush = false;
buf->flush_time = 0;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
/* /*
* Must be set before marking the journal entry as open: * Must be set before marking the journal entry as open:
*/ */
@ -305,8 +289,8 @@ static int journal_entry_open(struct journal *j)
old.v = new.v = v; old.v = new.v = v;
BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL); BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL);
BUG_ON(journal_state_count(new, new.idx));
EBUG_ON(journal_state_count(new, new.idx));
journal_state_inc(&new); journal_state_inc(&new);
new.cur_entry_offset = 0; new.cur_entry_offset = 0;
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
@ -595,9 +579,12 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
seq = max(seq, last_unwritten_seq(j)); seq = max(seq, last_unwritten_seq(j));
recheck_need_open: recheck_need_open:
if (seq == journal_cur_seq(j) && !journal_entry_is_open(j)) { if (seq > journal_cur_seq(j)) {
struct journal_res res = { 0 }; struct journal_res res = { 0 };
if (journal_entry_is_open(j))
__journal_entry_close(j);
spin_unlock(&j->lock); spin_unlock(&j->lock);
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
@ -694,42 +681,12 @@ int bch2_journal_meta(struct journal *j)
*/ */
void bch2_journal_flush_async(struct journal *j, struct closure *parent) void bch2_journal_flush_async(struct journal *j, struct closure *parent)
{ {
u64 seq, journal_seq; bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent);
spin_lock(&j->lock);
journal_seq = journal_cur_seq(j);
if (journal_entry_is_open(j)) {
seq = journal_seq;
} else if (journal_seq) {
seq = journal_seq - 1;
} else {
spin_unlock(&j->lock);
return;
}
spin_unlock(&j->lock);
bch2_journal_flush_seq_async(j, seq, parent);
} }
int bch2_journal_flush(struct journal *j) int bch2_journal_flush(struct journal *j)
{ {
u64 seq, journal_seq; return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
spin_lock(&j->lock);
journal_seq = journal_cur_seq(j);
if (journal_entry_is_open(j)) {
seq = journal_seq;
} else if (journal_seq) {
seq = journal_seq - 1;
} else {
spin_unlock(&j->lock);
return 0;
}
spin_unlock(&j->lock);
return bch2_journal_flush_seq(j, seq);
} }
/* /*
@ -1022,8 +979,7 @@ void bch2_fs_journal_stop(struct journal *j)
BUG_ON(!bch2_journal_error(j) && BUG_ON(!bch2_journal_error(j) &&
test_bit(JOURNAL_REPLAY_DONE, &j->flags) && test_bit(JOURNAL_REPLAY_DONE, &j->flags) &&
(journal_entry_is_open(j) || j->last_empty_seq != journal_cur_seq(j));
j->last_empty_seq + 1 != journal_cur_seq(j)));
cancel_delayed_work_sync(&j->write_work); cancel_delayed_work_sync(&j->write_work);
bch2_journal_reclaim_stop(j); bch2_journal_reclaim_stop(j);
@ -1093,11 +1049,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
set_bit(JOURNAL_STARTED, &j->flags); set_bit(JOURNAL_STARTED, &j->flags);
j->last_flush_write = jiffies; j->last_flush_write = jiffies;
journal_pin_new_entry(j); j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j) + 1;
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
bch2_journal_buf_init(j);
c->last_bucket_seq_cleanup = journal_cur_seq(j); c->last_bucket_seq_cleanup = journal_cur_seq(j);

View file

@ -1368,8 +1368,6 @@ static void journal_write_done(struct closure *cl)
journal_seq_pin(j, seq)->devs = w->devs_written; journal_seq_pin(j, seq)->devs = w->devs_written;
if (!err) { if (!err) {
j->seq_ondisk = seq;
if (!JSET_NO_FLUSH(w->data)) { if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq; j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq; j->last_seq_ondisk = w->last_seq;
@ -1377,6 +1375,8 @@ static void journal_write_done(struct closure *cl)
} else if (!j->err_seq || seq < j->err_seq) } else if (!j->err_seq || seq < j->err_seq)
j->err_seq = seq; j->err_seq = seq;
j->seq_ondisk = seq;
/* /*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets: * more buckets:

View file

@ -226,8 +226,6 @@ void bch2_journal_space_available(struct journal *j)
ret = cur_entry_journal_stuck; ret = cur_entry_journal_stuck;
} else if (!j->space[journal_space_discarded].next_entry) } else if (!j->space[journal_space_discarded].next_entry)
ret = cur_entry_journal_full; ret = cur_entry_journal_full;
else if (!fifo_free(&j->pin))
ret = cur_entry_journal_pin_full;
if ((j->space[journal_space_clean_ondisk].next_entry < if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) && j->space[journal_space_clean_ondisk].total) &&
@ -369,9 +367,6 @@ static inline void __journal_pin_drop(struct journal *j,
if (atomic_dec_and_test(&pin_list->count) && if (atomic_dec_and_test(&pin_list->count) &&
pin_list == &fifo_peek_front(&j->pin)) pin_list == &fifo_peek_front(&j->pin))
bch2_journal_reclaim_fast(j); bch2_journal_reclaim_fast(j);
else if (fifo_used(&j->pin) == 1 &&
atomic_read(&pin_list->count) == 1)
journal_wake(j);
} }
void bch2_journal_pin_drop(struct journal *j, void bch2_journal_pin_drop(struct journal *j,
@ -769,8 +764,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
*/ */
ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) || ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
journal_last_seq(j) > seq_to_flush || journal_last_seq(j) > seq_to_flush ||
(fifo_used(&j->pin) == 1 && !fifo_used(&j->pin);
atomic_read(&fifo_peek_front(&j->pin).count) == 1);
spin_unlock(&j->lock); spin_unlock(&j->lock);
mutex_unlock(&j->reclaim_lock); mutex_unlock(&j->reclaim_lock);

View file

@ -1344,7 +1344,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
} }
sb_clean->flags = 0; sb_clean->flags = 0;
sb_clean->journal_seq = cpu_to_le64(journal_cur_seq(&c->journal) - 1); sb_clean->journal_seq = cpu_to_le64(atomic64_read(&c->journal.seq));
/* Trying to catch outstanding bug: */ /* Trying to catch outstanding bug: */
BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX); BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);