3324249e6e
commit e6fff81e487089e47358a028526a9f63cdbcd503 upstream.
When we replay unfinished intent items that have been recovered from the
log, it's possible that the replay will cause the creation of more
deferred work items. As outlined in commit 509955823c
("xfs: log
recovery should replay deferred ops in order"), later work items have an
implicit ordering dependency on earlier work items. Therefore, recovery
must replay the items (both recovered and created) in the same order
that they would have been during normal operation.
For log recovery, we enforce this ordering by using an empty transaction
to collect deferred ops that get created in the process of recovering a
log intent item to prevent them from being committed before the rest of
the recovered intent items. After we finish committing all the
recovered log items, we allocate a transaction with an enormous block
reservation, splice our huge list of created deferred ops into that
transaction, and commit it, thereby finishing all those ops.
This is /really/ hokey -- it's the one place in XFS where we allow
nested transactions; the splicing of the defer ops list is is inelegant
and has to be done twice per recovery function; and the broken way we
handle inode pointers and block reservations cause subtle use-after-free
and allocator problems that will be fixed by this patch and the two
patches after it.
Therefore, replace the hokey empty transaction with a structure designed
to capture each chain of deferred ops that are created as part of
recovering a single unfinished log intent. Finally, refactor the loop
that replays those chains to do so using one transaction per chain.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
87 lines
2.6 KiB
C
87 lines
2.6 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
*/
|
|
#ifndef __XFS_REFCOUNT_ITEM_H__
|
|
#define __XFS_REFCOUNT_ITEM_H__
|
|
|
|
/*
|
|
* There are (currently) two pairs of refcount btree redo item types:
|
|
* increase and decrease. The log items for these are CUI (refcount
|
|
* update intent) and CUD (refcount update done). The redo item type
|
|
* is encoded in the flags field of each xfs_map_extent.
|
|
*
|
|
* *I items should be recorded in the *first* of a series of rolled
|
|
* transactions, and the *D items should be recorded in the same
|
|
* transaction that records the associated refcountbt updates.
|
|
*
|
|
* Should the system crash after the commit of the first transaction
|
|
* but before the commit of the final transaction in a series, log
|
|
* recovery will use the redo information recorded by the intent items
|
|
* to replay the refcountbt metadata updates.
|
|
*/
|
|
|
|
/* kernel only CUI/CUD definitions */
|
|
|
|
struct xfs_mount;
|
|
struct kmem_zone;
|
|
|
|
/*
|
|
* Max number of extents in fast allocation path.
|
|
*/
|
|
#define XFS_CUI_MAX_FAST_EXTENTS 16
|
|
|
|
/*
|
|
* Define CUI flag bits. Manipulated by set/clear/test_bit operators.
|
|
*/
|
|
#define XFS_CUI_RECOVERED 1
|
|
|
|
/*
|
|
* This is the "refcount update intent" log item. It is used to log
|
|
* the fact that some reverse mappings need to change. It is used in
|
|
* conjunction with the "refcount update done" log item described
|
|
* below.
|
|
*
|
|
* These log items follow the same rules as struct xfs_efi_log_item;
|
|
* see the comments about that structure (in xfs_extfree_item.h) for
|
|
* more details.
|
|
*/
|
|
struct xfs_cui_log_item {
|
|
struct xfs_log_item cui_item;
|
|
atomic_t cui_refcount;
|
|
atomic_t cui_next_extent;
|
|
unsigned long cui_flags; /* misc flags */
|
|
struct xfs_cui_log_format cui_format;
|
|
};
|
|
|
|
static inline size_t
|
|
xfs_cui_log_item_sizeof(
|
|
unsigned int nr)
|
|
{
|
|
return offsetof(struct xfs_cui_log_item, cui_format) +
|
|
xfs_cui_log_format_sizeof(nr);
|
|
}
|
|
|
|
/*
|
|
* This is the "refcount update done" log item. It is used to log the
|
|
* fact that some refcountbt updates mentioned in an earlier cui item
|
|
* have been performed.
|
|
*/
|
|
struct xfs_cud_log_item {
|
|
struct xfs_log_item cud_item;
|
|
struct xfs_cui_log_item *cud_cuip;
|
|
struct xfs_cud_log_format cud_format;
|
|
};
|
|
|
|
extern struct kmem_zone *xfs_cui_zone;
|
|
extern struct kmem_zone *xfs_cud_zone;
|
|
|
|
struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
|
|
void xfs_cui_item_free(struct xfs_cui_log_item *);
|
|
void xfs_cui_release(struct xfs_cui_log_item *);
|
|
int xfs_cui_recover(struct xfs_cui_log_item *cuip,
|
|
struct list_head *capture_list);
|
|
|
|
#endif /* __XFS_REFCOUNT_ITEM_H__ */
|