From 1be052b20ba04b635cf87bcff80de5314697953e Mon Sep 17 00:00:00 2001 From: Eugene Zemtsov Date: Mon, 18 Nov 2019 20:21:06 -0800 Subject: [PATCH] ANDROID: Initial commit of Incremental FS Fully working incremental fs filesystem Signed-off-by: Eugene Zemtsov Signed-off-by: Paul Lawrence Bug: 133435829 Change-Id: I14741a61ce7891a0f9054e70f026917712cbef78 --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/incfs/Kconfig | 18 + fs/incfs/Makefile | 9 + fs/incfs/compat.h | 33 + fs/incfs/data_mgmt.c | 1142 ++++++++ fs/incfs/data_mgmt.h | 339 +++ fs/incfs/format.c | 696 +++++ fs/incfs/format.h | 349 +++ fs/incfs/integrity.c | 217 ++ fs/incfs/integrity.h | 72 + fs/incfs/internal.h | 21 + fs/incfs/main.c | 103 + fs/incfs/vfs.c | 2202 +++++++++++++++ fs/incfs/vfs.h | 13 + include/uapi/linux/incrementalfs.h | 244 ++ .../selftests/filesystems/incfs/Makefile | 16 + .../selftests/filesystems/incfs/config | 1 + .../selftests/filesystems/incfs/incfs_test.c | 2421 +++++++++++++++++ .../selftests/filesystems/incfs/utils.c | 377 +++ .../selftests/filesystems/incfs/utils.h | 59 + 21 files changed, 8334 insertions(+) create mode 100644 fs/incfs/Kconfig create mode 100644 fs/incfs/Makefile create mode 100644 fs/incfs/compat.h create mode 100644 fs/incfs/data_mgmt.c create mode 100644 fs/incfs/data_mgmt.h create mode 100644 fs/incfs/format.c create mode 100644 fs/incfs/format.h create mode 100644 fs/incfs/integrity.c create mode 100644 fs/incfs/integrity.h create mode 100644 fs/incfs/internal.h create mode 100644 fs/incfs/main.c create mode 100644 fs/incfs/vfs.c create mode 100644 fs/incfs/vfs.h create mode 100644 include/uapi/linux/incrementalfs.h create mode 100644 tools/testing/selftests/filesystems/incfs/Makefile create mode 100644 tools/testing/selftests/filesystems/incfs/config create mode 100644 tools/testing/selftests/filesystems/incfs/incfs_test.c create mode 100644 tools/testing/selftests/filesystems/incfs/utils.c create mode 100644 tools/testing/selftests/filesystems/incfs/utils.h diff --git a/fs/Kconfig b/fs/Kconfig index 42fe99e24071..26454d4f815f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -121,6 +121,7 @@ source "fs/quota/Kconfig" source "fs/autofs/Kconfig" source "fs/fuse/Kconfig" source "fs/overlayfs/Kconfig" +source "fs/incfs/Kconfig" menu "Caches" diff --git a/fs/Makefile b/fs/Makefile index cc5524e8acda..1edfae4d6a0b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -112,6 +112,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_ORANGEFS_FS) += orangefs/ +obj-$(CONFIG_INCREMENTAL_FS) += incfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig new file mode 100644 index 000000000000..d860c07664c3 --- /dev/null +++ b/fs/incfs/Kconfig @@ -0,0 +1,18 @@ +config INCREMENTAL_FS + tristate "Incremental file system support" + depends on BLOCK + select DECOMPRESS_LZ4 + select CRC32 + select CRYPTO + select CRYPTO_SHA256 + select X509_CERTIFICATE_PARSER + select ASYMMETRIC_KEY_TYPE + select ASYMMETRIC_PUBLIC_KEY_SUBTYPE + select PKCS7_MESSAGE_PARSER + help + Incremental FS is a read-only virtual file system that facilitates execution + of programs while their binaries are still being lazily downloaded over the + network, USB or pigeon post. + + To compile this file system support as a module, choose M here: the + module will be called incrementalfs. diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile new file mode 100644 index 000000000000..8d734bf91ecd --- /dev/null +++ b/fs/incfs/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INCREMENTAL_FS) += incrementalfs.o + +incrementalfs-y := \ + data_mgmt.o \ + format.o \ + integrity.o \ + main.o \ + vfs.o diff --git a/fs/incfs/compat.h b/fs/incfs/compat.h new file mode 100644 index 000000000000..f6fd9b2b3cb2 --- /dev/null +++ b/fs/incfs/compat.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_COMPAT_H +#define _INCFS_COMPAT_H + +#include +#include + +typedef unsigned int __poll_t; + +#ifndef u64_to_user_ptr +#define u64_to_user_ptr(x) ( \ +{ \ + typecheck(u64, x); \ + (void __user *)(uintptr_t)x; \ +} \ +) +#endif + +#ifndef lru_to_page +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#endif + +#define readahead_gfp_mask(x) \ + (mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN) + +#ifndef SB_ACTIVE +#define SB_ACTIVE MS_ACTIVE +#endif + +#endif /* _INCFS_COMPAT_H */ diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c new file mode 100644 index 000000000000..25ea1099946d --- /dev/null +++ b/fs/incfs/data_mgmt.c @@ -0,0 +1,1142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path) +{ + struct mount_info *mi = NULL; + int error = 0; + + mi = kzalloc(sizeof(*mi), GFP_NOFS); + if (!mi) + return ERR_PTR(-ENOMEM); + + mi->mi_sb = sb; + mi->mi_options = *options; + mi->mi_backing_dir_path = *backing_dir_path; + mi->mi_owner = get_current_cred(); + path_get(&mi->mi_backing_dir_path); + mutex_init(&mi->mi_dir_struct_mutex); + mutex_init(&mi->mi_pending_reads_mutex); + init_waitqueue_head(&mi->mi_pending_reads_notif_wq); + INIT_LIST_HEAD(&mi->mi_reads_list_head); + + if (options->read_log_pages != 0) { + size_t buf_size = PAGE_SIZE * options->read_log_pages; + + spin_lock_init(&mi->mi_log.rl_writer_lock); + init_waitqueue_head(&mi->mi_log.ml_notif_wq); + + mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); + mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); + if (!mi->mi_log.rl_ring_buf) { + error = -ENOMEM; + goto err; + } + } + + return mi; + +err: + incfs_free_mount_info(mi); + return ERR_PTR(error); +} + +void incfs_free_mount_info(struct mount_info *mi) +{ + if (!mi) + return; + + dput(mi->mi_index_dir); + path_put(&mi->mi_backing_dir_path); + mutex_destroy(&mi->mi_dir_struct_mutex); + mutex_destroy(&mi->mi_pending_reads_mutex); + put_cred(mi->mi_owner); + kfree(mi->mi_log.rl_ring_buf); + kfree(mi); +} + +static void data_file_segment_init(struct data_file_segment *segment) +{ + init_waitqueue_head(&segment->new_data_arrival_wq); + mutex_init(&segment->blockmap_mutex); + INIT_LIST_HEAD(&segment->reads_list_head); +} + +static void data_file_segment_destroy(struct data_file_segment *segment) +{ + mutex_destroy(&segment->blockmap_mutex); +} + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) +{ + struct data_file *df = NULL; + struct backing_file_context *bfc = NULL; + int md_records; + u64 size; + int error = 0; + int i; + + if (!bf || !mi) + return ERR_PTR(-EFAULT); + + if (!S_ISREG(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + bfc = incfs_alloc_bfc(bf); + if (IS_ERR(bfc)) + return ERR_CAST(bfc); + + df = kzalloc(sizeof(*df), GFP_NOFS); + if (!df) { + error = -ENOMEM; + goto out; + } + + df->df_backing_file_context = bfc; + df->df_mount_info = mi; + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_init(&df->df_segments[i]); + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + error = incfs_read_file_header(bfc, &df->df_metadata_off, + &df->df_id, &size); + mutex_unlock(&bfc->bc_mutex); + + if (error) + goto out; + + df->df_size = size; + if (size > 0) + df->df_block_count = get_blocks_count_for_size(size); + + md_records = incfs_scan_metadata_chain(df); + if (md_records < 0) + error = md_records; + +out: + if (error) { + incfs_free_bfc(bfc); + df->df_backing_file_context = NULL; + incfs_free_data_file(df); + return ERR_PTR(error); + } + return df; +} + +void incfs_free_data_file(struct data_file *df) +{ + int i; + + if (!df) + return; + + incfs_free_mtree(df->df_hash_tree); + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_destroy(&df->df_segments[i]); + incfs_free_bfc(df->df_backing_file_context); + kfree(df); +} + +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file) +{ + struct inode_info *node = get_incfs_node(inode); + struct data_file *df = NULL; + int err = 0; + + inode_lock(inode); + if (S_ISREG(inode->i_mode)) { + if (!node->n_file) { + df = incfs_open_data_file(mi, backing_file); + + if (IS_ERR(df)) + err = PTR_ERR(df); + else + node->n_file = df; + } + } else + err = -EBADF; + inode_unlock(inode); + return err; +} + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) +{ + struct dir_file *dir = NULL; + + if (!S_ISDIR(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + dir = kzalloc(sizeof(*dir), GFP_NOFS); + if (!dir) + return ERR_PTR(-ENOMEM); + + dir->backing_dir = get_file(bf); + dir->mount_info = mi; + return dir; +} + +void incfs_free_dir_file(struct dir_file *dir) +{ + if (!dir) + return; + if (dir->backing_dir) + fput(dir->backing_dir); + kfree(dir); +} + +static ssize_t decompress(struct mem_range src, struct mem_range dst) +{ + int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len); + + if (result < 0) + return -EBADMSG; + + return result; +} + +static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, + int block_index, bool timed_out) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state state; + s64 now_us = ktime_to_us(ktime_get()); + struct read_log_record record = { + .file_id = *id, + .block_index = block_index, + .timed_out = timed_out, + .timestamp_us = now_us + }; + + if (log->rl_size == 0) + return; + + spin_lock(&log->rl_writer_lock); + state = READ_ONCE(log->rl_state); + log->rl_ring_buf[state.next_index] = record; + if (++state.next_index == log->rl_size) { + state.next_index = 0; + ++state.current_pass_no; + } + WRITE_ONCE(log->rl_state, state); + spin_unlock(&log->rl_writer_lock); + + wake_up_all(&log->ml_notif_wq); +} + +static int validate_hash_tree(struct file *bf, struct data_file *df, + int block_index, struct mem_range data, u8 *buf) +{ + u8 digest[INCFS_MAX_HASH_SIZE] = {}; + struct mtree *tree = NULL; + struct ondisk_signature *sig = NULL; + struct mem_range calc_digest_rng; + struct mem_range saved_digest_rng; + struct mem_range root_hash_rng; + int digest_size; + int hash_block_index = block_index; + int hash_per_block; + int lvl = 0; + int res; + + tree = df->df_hash_tree; + sig = df->df_signature; + if (!tree || !sig) + return 0; + + digest_size = tree->alg->digest_size; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + calc_digest_rng = range(digest, digest_size); + res = incfs_calc_digest(tree->alg, data, calc_digest_rng); + if (res) + return res; + + for (lvl = 0; lvl < tree->depth; lvl++) { + loff_t lvl_off = tree->hash_level_suboffset[lvl] + + sig->mtree_offset; + loff_t hash_block_off = lvl_off + + round_down(hash_block_index * digest_size, + INCFS_DATA_FILE_BLOCK_SIZE); + size_t hash_off_in_block = hash_block_index * digest_size + % INCFS_DATA_FILE_BLOCK_SIZE; + struct mem_range buf_range = range(buf, + INCFS_DATA_FILE_BLOCK_SIZE); + ssize_t read_res = incfs_kread(bf, buf, + INCFS_DATA_FILE_BLOCK_SIZE, hash_block_off); + + if (read_res < 0) + return read_res; + if (read_res != INCFS_DATA_FILE_BLOCK_SIZE) + return -EIO; + + saved_digest_rng = range(buf + hash_off_in_block, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, saved_digest_rng)) { + int i; + bool zero = true; + + pr_debug("incfs: Hash mismatch lvl:%d blk:%d\n", + lvl, block_index); + for (i = 0; i < saved_digest_rng.len; ++i) + if (saved_digest_rng.data[i]) { + zero = false; + break; + } + + if (zero) + pr_debug("incfs: Note saved_digest all zero - did you forget to load the hashes?\n"); + return -EBADMSG; + } + + res = incfs_calc_digest(tree->alg, buf_range, calc_digest_rng); + if (res) + return res; + hash_block_index /= hash_per_block; + } + + root_hash_rng = range(tree->root_hash, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, root_hash_rng)) { + pr_debug("incfs: Root hash mismatch blk:%d\n", block_index); + return -EBADMSG; + } + return 0; +} + +static int revalidate_signature(struct file *bf, struct data_file *df) +{ + struct ondisk_signature *sig = df->df_signature; + struct mem_range root_hash = {}; + int result = 0; + u8 *sig_buf = NULL; + u8 *add_data_buf = NULL; + ssize_t read_res; + + /* File has no signature. */ + if (!sig || !df->df_hash_tree || sig->sig_size == 0) + return 0; + + /* Signature has already been validated. */ + if (df->df_signature_validated) + return 0; + + add_data_buf = kzalloc(sig->add_data_size, GFP_NOFS); + if (!add_data_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, add_data_buf, sig->add_data_size, + sig->add_data_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->add_data_size) { + result = -EIO; + goto out; + } + + sig_buf = kzalloc(sig->sig_size, GFP_NOFS); + if (!sig_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, sig_buf, sig->sig_size, sig->sig_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->sig_size) { + result = -EIO; + goto out; + } + + root_hash = range(df->df_hash_tree->root_hash, + df->df_hash_tree->alg->digest_size); + + result = incfs_validate_pkcs7_signature( + range(sig_buf, sig->sig_size), + root_hash, + range(add_data_buf, sig->add_data_size)); + + if (result == 0) + df->df_signature_validated = true; +out: + kfree(sig_buf); + kfree(add_data_buf); + return result; +} + +static struct data_file_segment *get_file_segment(struct data_file *df, + int block_index) +{ + int seg_idx = block_index % ARRAY_SIZE(df->df_segments); + + return &df->df_segments[seg_idx]; +} + +static bool is_data_block_present(struct data_file_block *block) +{ + return (block->db_backing_file_data_offset != 0) && + (block->db_stored_size != 0); +} + +static int get_data_file_block(struct data_file *df, int index, + struct data_file_block *res_block) +{ + struct incfs_blockmap_entry bme = {}; + struct backing_file_context *bfc = NULL; + loff_t blockmap_off = 0; + u16 flags = 0; + int error = 0; + + if (!df || !res_block) + return -EFAULT; + + blockmap_off = df->df_blockmap_off; + bfc = df->df_backing_file_context; + + if (index < 0 || index >= df->df_block_count || blockmap_off == 0) + return -EINVAL; + + error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); + if (error) + return error; + + flags = le16_to_cpu(bme.me_flags); + res_block->db_backing_file_data_offset = + le16_to_cpu(bme.me_data_offset_hi); + res_block->db_backing_file_data_offset <<= 32; + res_block->db_backing_file_data_offset |= + le32_to_cpu(bme.me_data_offset_lo); + res_block->db_stored_size = le16_to_cpu(bme.me_data_size); + res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? + COMPRESSION_LZ4 : + COMPRESSION_NONE; + return 0; +} + +static bool is_read_done(struct pending_read *read) +{ + return atomic_read_acquire(&read->done) != 0; +} + +static void set_read_done(struct pending_read *read) +{ + atomic_set_release(&read->done, 1); +} + +/* + * Notifies a given data file about pending read from a given block. + * Returns a new pending read entry. + */ +static struct pending_read *add_pending_read(struct data_file *df, + int block_index) +{ + struct pending_read *result = NULL; + struct data_file_segment *segment = NULL; + struct mount_info *mi = NULL; + + WARN_ON(!df); + segment = get_file_segment(df, block_index); + mi = df->df_mount_info; + + WARN_ON(!segment); + WARN_ON(!mi); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return NULL; + + result->file_id = df->df_id; + result->block_index = block_index; + result->timestamp_us = ktime_to_us(ktime_get()); + + mutex_lock(&mi->mi_pending_reads_mutex); + + result->serial_number = ++mi->mi_last_pending_read_number; + mi->mi_pending_reads_count++; + + list_add(&result->mi_reads_list, &mi->mi_reads_list_head); + list_add(&result->segment_reads_list, &segment->reads_list_head); + mutex_unlock(&mi->mi_pending_reads_mutex); + + wake_up_all(&mi->mi_pending_reads_notif_wq); + return result; +} + +/* Notifies a given data file that pending read is completed. */ +static void remove_pending_read(struct data_file *df, struct pending_read *read) +{ + struct mount_info *mi = NULL; + + if (!df || !read) { + WARN_ON(!df); + WARN_ON(!read); + return; + } + + mi = df->df_mount_info; + + mutex_lock(&mi->mi_pending_reads_mutex); + list_del(&read->mi_reads_list); + list_del(&read->segment_reads_list); + + mi->mi_pending_reads_count--; + mutex_unlock(&mi->mi_pending_reads_mutex); + + kfree(read); +} + +static void notify_pending_reads(struct mount_info *mi, + struct data_file_segment *segment, + int index) +{ + struct pending_read *entry = NULL; + + /* Notify pending reads waiting for this block. */ + mutex_lock(&mi->mi_pending_reads_mutex); + list_for_each_entry(entry, &segment->reads_list_head, + segment_reads_list) { + if (entry->block_index == index) + set_read_done(entry); + } + mutex_unlock(&mi->mi_pending_reads_mutex); + wake_up_all(&segment->new_data_arrival_wq); +} + +static int wait_for_data_block(struct data_file *df, int block_index, + int timeout_ms, + struct data_file_block *res_block) +{ + struct data_file_block block = {}; + struct data_file_segment *segment = NULL; + struct pending_read *read = NULL; + struct mount_info *mi = NULL; + int error = 0; + int wait_res = 0; + + if (!df || !res_block) + return -EFAULT; + + if (block_index < 0 || block_index >= df->df_block_count) + return -EINVAL; + + if (df->df_blockmap_off <= 0) + return -ENODATA; + + segment = get_file_segment(df, block_index); + WARN_ON(!segment); + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* Look up the given block */ + error = get_data_file_block(df, block_index, &block); + + /* If it's not found, create a pending read */ + if (!error && !is_data_block_present(&block) && timeout_ms != 0) + read = add_pending_read(df, block_index); + + mutex_unlock(&segment->blockmap_mutex); + if (error) + return error; + + /* If the block was found, just return it. No need to wait. */ + if (is_data_block_present(&block)) { + *res_block = block; + return 0; + } + + mi = df->df_mount_info; + + if (timeout_ms == 0) { + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + + if (!read) + return -ENOMEM; + + /* Wait for notifications about block's arrival */ + wait_res = + wait_event_interruptible_timeout(segment->new_data_arrival_wq, + (is_read_done(read)), + msecs_to_jiffies(timeout_ms)); + + /* Woke up, the pending read is no longer needed. */ + remove_pending_read(df, read); + read = NULL; + + if (wait_res == 0) { + /* Wait has timed out */ + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + if (wait_res < 0) { + /* + * Only ERESTARTSYS is really expected here when a signal + * comes while we wait. + */ + return wait_res; + } + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* + * Re-read block's info now, it has just arrived and + * should be available. + */ + error = get_data_file_block(df, block_index, &block); + if (!error) { + if (is_data_block_present(&block)) + *res_block = block; + else { + /* + * Somehow wait finished successfully bug block still + * can't be found. It's not normal. + */ + pr_warn("incfs:Wait succeeded, but block not found.\n"); + error = -ENODATA; + } + } + + mutex_unlock(&segment->blockmap_mutex); + return error; +} + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp) +{ + loff_t pos; + ssize_t result; + size_t bytes_to_read; + struct mount_info *mi = NULL; + struct file *bf = NULL; + struct data_file_block block = {}; + + if (!dst.data || !df) + return -EFAULT; + + if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) + return -ERANGE; + + mi = df->df_mount_info; + bf = df->df_backing_file_context->bc_file; + + result = wait_for_data_block(df, index, timeout_ms, &block); + if (result < 0) + goto out; + + pos = block.db_backing_file_data_offset; + if (block.db_comp_alg == COMPRESSION_NONE) { + bytes_to_read = min(dst.len, block.db_stored_size); + result = incfs_kread(bf, dst.data, bytes_to_read, pos); + + /* Some data was read, but not enough */ + if (result >= 0 && result != bytes_to_read) + result = -EIO; + } else { + bytes_to_read = min(tmp.len, block.db_stored_size); + result = incfs_kread(bf, tmp.data, bytes_to_read, pos); + if (result == bytes_to_read) { + result = + decompress(range(tmp.data, bytes_to_read), dst); + if (result < 0) { + const char *name = + bf->f_path.dentry->d_name.name; + + pr_warn_once("incfs: Decompression error. %s", + name); + } + } else if (result >= 0) { + /* Some data was read, but not enough */ + result = -EIO; + } + } + + if (result > 0) { + int err = validate_hash_tree(bf, df, index, dst, tmp.data); + + if (err < 0) + result = err; + } + + if (result > 0) { + int err = revalidate_signature(bf, df); + + if (err < 0) + result = err; + } + + if (result >= 0) + log_block_read(mi, &df->df_id, index, false /*timed out*/); + +out: + return result; +} + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data) +{ + struct mount_info *mi = NULL; + struct backing_file_context *bfc = NULL; + struct data_file_segment *segment = NULL; + struct data_file_block existing_block = {}; + u16 flags = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (block->block_index >= df->df_block_count) + return -ERANGE; + + segment = get_file_segment(df, block->block_index); + if (!segment) + return -EFAULT; + if (block->compression == COMPRESSION_LZ4) + flags |= INCFS_BLOCK_COMPRESSED_LZ4; + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + error = get_data_file_block(df, block->block_index, &existing_block); + if (error) + goto unlock; + if (is_data_block_present(&existing_block)) { + /* Block is already present, nothing to do here */ + goto unlock; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) { + error = incfs_write_data_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + df->df_blockmap_off, flags); + mutex_unlock(&bfc->bc_mutex); + } + if (!error) + notify_pending_reads(mi, segment, block->block_index); + +unlock: + mutex_unlock(&segment->blockmap_mutex); + if (error) + pr_debug("incfs: %s %d error: %d\n", __func__, + block->block_index, error); + return error; +} + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst) +{ + struct file *bf = df->df_backing_file_context->bc_file; + struct ondisk_signature *sig; + int read_res = 0; + + if (!dst.data) + return -EFAULT; + + sig = df->df_signature; + if (!sig) + return 0; + + if (dst.len < sig->sig_size) + return -E2BIG; + + read_res = incfs_kread(bf, dst.data, sig->sig_size, sig->sig_offset); + + if (read_res < 0) + return read_res; + + if (read_res != sig->sig_size) + return -EIO; + + return read_res; +} + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data) +{ + struct backing_file_context *bfc = NULL; + struct mount_info *mi = NULL; + struct mtree *hash_tree = NULL; + struct ondisk_signature *sig = NULL; + loff_t hash_area_base = 0; + loff_t hash_area_size = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + if (!(block->flags & INCFS_BLOCK_FLAGS_HASH)) + return -EINVAL; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (!df) + return -ENOENT; + + hash_tree = df->df_hash_tree; + sig = df->df_signature; + if (!hash_tree || !sig || sig->mtree_offset == 0) + return -ENOTSUPP; + + hash_area_base = sig->mtree_offset; + hash_area_size = sig->mtree_size; + if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + + block->data_len) { + /* Hash block goes beyond dedicated hash area of this file. */ + return -ERANGE; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) + error = incfs_write_hash_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + hash_area_base); + mutex_unlock(&bfc->bc_mutex); + return error; +} + +static int process_blockmap_md(struct incfs_blockmap *bm, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + int error = 0; + loff_t base_off = le64_to_cpu(bm->m_base_offset); + u32 block_count = le32_to_cpu(bm->m_block_count); + + if (!df) + return -EFAULT; + + if (df->df_block_count != block_count) + return -EBADMSG; + + df->df_blockmap_off = base_off; + return error; +} + +static int process_file_attr_md(struct incfs_file_attr *fa, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + u16 attr_size = le16_to_cpu(fa->fa_size); + + if (!df) + return -EFAULT; + + if (attr_size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset); + df->n_attr.fa_value_size = attr_size; + df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc); + + return 0; +} + +static int process_file_signature_md(struct incfs_file_signature *sg, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + struct mtree *hash_tree = NULL; + struct ondisk_signature *signature = NULL; + int error = 0; + loff_t base_tree_off = le64_to_cpu(sg->sg_hash_tree_offset); + u32 tree_size = le32_to_cpu(sg->sg_hash_tree_size); + loff_t sig_off = le64_to_cpu(sg->sg_sig_offset); + u32 sig_size = le32_to_cpu(sg->sg_sig_size); + loff_t add_data_off = le64_to_cpu(sg->sg_add_data_offset); + u32 add_data_size = le32_to_cpu(sg->sg_add_data_size); + + if (!df) + return -ENOENT; + + signature = kzalloc(sizeof(*signature), GFP_NOFS); + if (!signature) { + error = -ENOMEM; + goto out; + } + + signature->add_data_offset = add_data_off; + signature->add_data_size = add_data_size; + signature->sig_offset = sig_off; + signature->sig_size = sig_size; + signature->mtree_offset = base_tree_off; + signature->mtree_size = tree_size; + + hash_tree = incfs_alloc_mtree(sg->sg_hash_alg, df->df_block_count, + range(sg->sg_root_hash, sizeof(sg->sg_root_hash))); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + if (hash_tree->hash_tree_area_size != tree_size) { + error = -EINVAL; + goto out; + } + if (tree_size > 0 && handler->md_record_offset <= base_tree_off) { + error = -EINVAL; + goto out; + } + if (handler->md_record_offset <= signature->add_data_offset || + handler->md_record_offset <= signature->sig_offset) { + error = -EINVAL; + goto out; + } + df->df_hash_tree = hash_tree; + df->df_signature = signature; +out: + if (error) { + incfs_free_mtree(hash_tree); + kfree(signature); + } + + return error; +} + +int incfs_scan_metadata_chain(struct data_file *df) +{ + struct metadata_handler *handler = NULL; + int result = 0; + int records_count = 0; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!df || !df->df_backing_file_context) + return -EFAULT; + + bfc = df->df_backing_file_context; + + handler = kzalloc(sizeof(*handler), GFP_NOFS); + if (!handler) + return -ENOMEM; + + /* No writing to the backing file while it's being scanned. */ + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + + /* Reading superblock */ + handler->md_record_offset = df->df_metadata_off; + handler->context = df; + handler->handle_blockmap = process_blockmap_md; + handler->handle_file_attr = process_file_attr_md; + handler->handle_signature = process_file_signature_md; + + pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n", + handler->md_record_offset); + while (handler->md_record_offset > 0) { + error = incfs_read_next_metadata_record(bfc, handler); + if (error) { + pr_warn("incfs: Error during reading incfs-metadata record. Offset: %lld Record #%d Error code: %d\n", + handler->md_record_offset, records_count + 1, + -error); + break; + } + records_count++; + } + if (error) { + pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n", + -error, records_count); + result = error; + } else { + pr_debug("incfs: Finished reading %d incfs-metadata records.\n", + records_count); + result = records_count; + } + mutex_unlock(&bfc->bc_mutex); +out: + kfree(handler); + return result; +} + +/* + * Quickly checks if there are pending reads with a serial number larger + * than a given one. + */ +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) +{ + bool result = false; + + mutex_lock(&mi->mi_pending_reads_mutex); + result = (mi->mi_last_pending_read_number > last_number) && + (mi->mi_pending_reads_count > 0); + mutex_unlock(&mi->mi_pending_reads_mutex); + return result; +} + +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size) +{ + int reported_reads = 0; + struct pending_read *entry = NULL; + + if (!mi) + return -EFAULT; + + if (reads_size <= 0) + return 0; + + mutex_lock(&mi->mi_pending_reads_mutex); + + if (mi->mi_last_pending_read_number <= sn_lowerbound + || mi->mi_pending_reads_count == 0) + goto unlock; + + list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) { + if (entry->serial_number <= sn_lowerbound) + continue; + + reads[reported_reads].file_id = entry->file_id; + reads[reported_reads].block_index = entry->block_index; + reads[reported_reads].serial_number = entry->serial_number; + reads[reported_reads].timestamp_us = entry->timestamp_us; + /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */ + + reported_reads++; + if (reported_reads >= reads_size) + break; + } + +unlock: + mutex_unlock(&mi->mi_pending_reads_mutex); + + return reported_reads; +} + +struct read_log_state incfs_get_log_state(struct mount_info *mi) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state result; + + spin_lock(&log->rl_writer_lock); + result = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_writer_lock); + return result; +} + +static u64 calc_record_count(const struct read_log_state *state, int rl_size) +{ + return state->current_pass_no * (u64)rl_size + state->next_index; +} + +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state) +{ + struct read_log *log = &mi->mi_log; + + u64 count = calc_record_count(&log->rl_state, log->rl_size) - + calc_record_count(&state, log->rl_size); + return min_t(int, count, log->rl_size); +} + +static void fill_pending_read_from_log_record( + struct incfs_pending_read_info *dest, const struct read_log_record *src, + struct read_log_state *state, u64 log_size) +{ + dest->file_id = src->file_id; + dest->block_index = src->block_index; + dest->serial_number = + state->current_pass_no * log_size + state->next_index; + dest->timestamp_us = src->timestamp_us; +} + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *reader_state, + struct incfs_pending_read_info *reads, + int reads_size) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state live_state = incfs_get_log_state(mi); + u64 read_count = calc_record_count(reader_state, log->rl_size); + u64 written_count = calc_record_count(&live_state, log->rl_size); + int dst_idx; + + if (reader_state->next_index >= log->rl_size || + read_count > written_count) + return -ERANGE; + + if (read_count == written_count) + return 0; + + if (read_count > written_count) { + /* This reader is somehow ahead of the writer. */ + pr_debug("incfs: Log reader is ahead of writer\n"); + *reader_state = live_state; + } + + if (written_count - read_count > log->rl_size) { + /* + * Reading pointer is too far behind, + * start from the record following the write pointer. + */ + pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", + (u32)reader_state->next_index, + (u32)reader_state->current_pass_no, + (u32)live_state.next_index, + (u32)live_state.current_pass_no - 1, (u32)log->rl_size); + + *reader_state = (struct read_log_state){ + .next_index = live_state.next_index, + .current_pass_no = live_state.current_pass_no - 1, + }; + } + + for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { + if (reader_state->next_index == live_state.next_index && + reader_state->current_pass_no == live_state.current_pass_no) + break; + + fill_pending_read_from_log_record( + &reads[dst_idx], + &log->rl_ring_buf[reader_state->next_index], + reader_state, log->rl_size); + + reader_state->next_index++; + if (reader_state->next_index == log->rl_size) { + reader_state->next_index = 0; + reader_state->current_pass_no++; + } + } + return dst_idx; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) +{ + if (lhs.len != rhs.len) + return false; + return memcmp(lhs.data, rhs.data, lhs.len) == 0; +} diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h new file mode 100644 index 000000000000..82ccab3be4bb --- /dev/null +++ b/fs/incfs/data_mgmt.h @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_DATA_MGMT_H +#define _INCFS_DATA_MGMT_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "internal.h" + +#define SEGMENTS_PER_FILE 3 + +struct read_log_record { + u32 block_index : 31; + + u32 timed_out : 1; + + u64 timestamp_us; + + incfs_uuid_t file_id; +} __packed; + +struct read_log_state { + /* Next slot in rl_ring_buf to write to. */ + u32 next_index; + + /* Current number of writer pass over rl_ring_buf */ + u32 current_pass_no; +}; + +/* A ring buffer to save records about data blocks which were recently read. */ +struct read_log { + struct read_log_record *rl_ring_buf; + + struct read_log_state rl_state; + + spinlock_t rl_writer_lock; + + int rl_size; + + /* + * A queue of waiters who want to be notified about reads. + */ + wait_queue_head_t ml_notif_wq; +}; + +struct mount_options { + unsigned int read_timeout_ms; + unsigned int readahead_pages; + unsigned int read_log_pages; + unsigned int read_log_wakeup_count; + bool no_backing_file_cache; + bool no_backing_file_readahead; +}; + +struct mount_info { + struct super_block *mi_sb; + + struct path mi_backing_dir_path; + + struct dentry *mi_index_dir; + + const struct cred *mi_owner; + + struct mount_options mi_options; + + /* This mutex is to be taken before create, rename, delete */ + struct mutex mi_dir_struct_mutex; + + /* + * A queue of waiters who want to be notified about new pending reads. + */ + wait_queue_head_t mi_pending_reads_notif_wq; + + /* + * Protects: + * - reads_list_head + * - mi_pending_reads_count + * - mi_last_pending_read_number + * - data_file_segment.reads_list_head + */ + struct mutex mi_pending_reads_mutex; + + /* List of active pending_read objects */ + struct list_head mi_reads_list_head; + + /* Total number of items in reads_list_head */ + int mi_pending_reads_count; + + /* + * Last serial number that was assigned to a pending read. + * 0 means no pending reads have been seen yet. + */ + int mi_last_pending_read_number; + + /* Temporary buffer for read logger. */ + struct read_log mi_log; +}; + +struct data_file_block { + loff_t db_backing_file_data_offset; + + size_t db_stored_size; + + enum incfs_compression_alg db_comp_alg; +}; + +struct pending_read { + incfs_uuid_t file_id; + + s64 timestamp_us; + + atomic_t done; + + int block_index; + + int serial_number; + + struct list_head mi_reads_list; + + struct list_head segment_reads_list; +}; + +struct data_file_segment { + wait_queue_head_t new_data_arrival_wq; + + /* Protects reads and writes from the blockmap */ + /* Good candidate for read/write mutex */ + struct mutex blockmap_mutex; + + /* List of active pending_read objects belonging to this segment */ + /* Protected by mount_info.pending_reads_mutex */ + struct list_head reads_list_head; +}; + +/* + * Extra info associated with a file. Just a few bytes set by a user. + */ +struct file_attr { + loff_t fa_value_offset; + + size_t fa_value_size; + + u32 fa_crc; +}; + + +struct data_file { + struct backing_file_context *df_backing_file_context; + + struct mount_info *df_mount_info; + + incfs_uuid_t df_id; + + /* + * Array of segments used to reduce lock contention for the file. + * Segment is chosen for a block depends on the block's index. + */ + struct data_file_segment df_segments[SEGMENTS_PER_FILE]; + + /* Base offset of the first metadata record. */ + loff_t df_metadata_off; + + /* Base offset of the block map. */ + loff_t df_blockmap_off; + + /* File size in bytes */ + loff_t df_size; + + int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */ + + struct file_attr n_attr; + + struct mtree *df_hash_tree; + + struct ondisk_signature *df_signature; + + /* True, if file signature has already been validated. */ + bool df_signature_validated; +}; + +struct dir_file { + struct mount_info *mount_info; + + struct file *backing_dir; +}; + +struct inode_info { + struct mount_info *n_mount_info; /* A mount, this file belongs to */ + + struct inode *n_backing_inode; + + struct data_file *n_file; + + struct inode n_vfs_inode; +}; + +struct dentry_info { + struct path backing_path; +}; + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path); + +void incfs_free_mount_info(struct mount_info *mi); + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); +void incfs_free_data_file(struct data_file *df); + +int incfs_scan_metadata_chain(struct data_file *df); + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); +void incfs_free_dir_file(struct dir_file *dir); + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp); + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst); + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data); + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data); + + +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); + +/* + * Collects pending reads and saves them into the array (reads/reads_size). + * Only reads with serial_number > sn_lowerbound are reported. + * Returns how many reads were saved into the array. + */ +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size); + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *start_state, + struct incfs_pending_read_info *reads, + int reads_size); +struct read_log_state incfs_get_log_state(struct mount_info *mi); +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state); + +static inline struct inode_info *get_incfs_node(struct inode *inode) +{ + if (!inode) + return NULL; + + if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { + /* This inode doesn't belong to us. */ + pr_warn_once("incfs: %s on an alien inode.", __func__); + return NULL; + } + + return container_of(inode, struct inode_info, n_vfs_inode); +} + +static inline struct data_file *get_incfs_data_file(struct file *f) +{ + struct inode_info *node = NULL; + + if (!f) + return NULL; + + if (!S_ISREG(f->f_inode->i_mode)) + return NULL; + + node = get_incfs_node(f->f_inode); + if (!node) + return NULL; + + return node->n_file; +} + +static inline struct dir_file *get_incfs_dir_file(struct file *f) +{ + if (!f) + return NULL; + + if (!S_ISDIR(f->f_inode->i_mode)) + return NULL; + + return (struct dir_file *)f->private_data; +} + +/* + * Make sure that inode_info.n_file is initialized and inode can be used + * for reading and writing data from/to the backing file. + */ +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file); + +static inline struct dentry_info *get_incfs_dentry(const struct dentry *d) +{ + if (!d) + return NULL; + + return (struct dentry_info *)d->d_fsdata; +} + +static inline void get_incfs_backing_path(const struct dentry *d, + struct path *path) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di) { + *path = (struct path) {}; + return; + } + + *path = di->backing_path; + path_get(path); +} + +static inline int get_blocks_count_for_size(u64 size) +{ + if (size == 0) + return 0; + return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); + +#endif /* _INCFS_DATA_MGMT_H */ diff --git a/fs/incfs/format.c b/fs/incfs/format.c new file mode 100644 index 000000000000..27498b9c3d34 --- /dev/null +++ b/fs/incfs/format.c @@ -0,0 +1,696 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat.h" +#include "format.h" + +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) +{ + struct backing_file_context *result = NULL; + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->bc_file = get_file(backing_file); + mutex_init(&result->bc_mutex); + return result; +} + +void incfs_free_bfc(struct backing_file_context *bfc) +{ + if (!bfc) + return; + + if (bfc->bc_file) + fput(bfc->bc_file); + + mutex_destroy(&bfc->bc_mutex); + kfree(bfc); +} + +loff_t incfs_get_end_offset(struct file *f) +{ + /* + * This function assumes that file size and the end-offset + * are the same. This is not always true. + */ + return i_size_read(file_inode(f)); +} + +/* + * Truncate the tail of the file to the given length. + * Used to rollback partially successful multistep writes. + */ +static int truncate_backing_file(struct backing_file_context *bfc, + loff_t new_end) +{ + struct inode *inode = NULL; + struct dentry *dentry = NULL; + loff_t old_end = 0; + struct iattr attr; + int result = 0; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (!bfc->bc_file) + return -EFAULT; + + old_end = incfs_get_end_offset(bfc->bc_file); + if (old_end == new_end) + return 0; + if (old_end < new_end) + return -EINVAL; + + inode = bfc->bc_file->f_inode; + dentry = bfc->bc_file->f_path.dentry; + + attr.ia_size = new_end; + attr.ia_valid = ATTR_SIZE; + + inode_lock(inode); + result = notify_change(dentry, &attr, NULL); + inode_unlock(inode); + + return result; +} + +/* Append a given number of zero bytes to the end of the backing file. */ +static int append_zeros(struct backing_file_context *bfc, size_t len) +{ + loff_t file_size = 0; + loff_t new_last_byte_offset = 0; + int res = 0; + + if (!bfc) + return -EFAULT; + + if (len == 0) + return 0; + + LOCK_REQUIRED(bfc->bc_mutex); + + /* + * Allocate only one byte at the new desired end of the file. + * It will increase file size and create a zeroed area of + * a given size. + */ + file_size = incfs_get_end_offset(bfc->bc_file); + new_last_byte_offset = file_size + len - 1; + res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (res) + return res; + + res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1); + return res; +} + +static int write_to_bf(struct backing_file_context *bfc, const void *buf, + size_t count, loff_t pos, bool sync) +{ + ssize_t res = 0; + + res = incfs_kwrite(bfc->bc_file, buf, count, pos); + if (res < 0) + return res; + if (res != count) + return -EIO; + + if (sync) + return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1); + + return 0; +} + +static u32 calc_md_crc(struct incfs_md_header *record) +{ + u32 result = 0; + __le32 saved_crc = record->h_record_crc; + __le64 saved_md_offset = record->h_next_md_offset; + size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size), + INCFS_MAX_METADATA_RECORD_SIZE); + + /* Zero fields which needs to be excluded from CRC calculation. */ + record->h_record_crc = 0; + record->h_next_md_offset = 0; + result = crc32(0, record, record_size); + + /* Restore excluded fields. */ + record->h_record_crc = saved_crc; + record->h_next_md_offset = saved_md_offset; + + return result; +} + +/* + * Append a given metadata record to the backing file and update a previous + * record to add the new record the the metadata list. + */ +static int append_md_to_backing_file(struct backing_file_context *bfc, + struct incfs_md_header *record) +{ + int result = 0; + loff_t record_offset; + loff_t file_pos; + __le64 new_md_offset; + size_t record_size; + + if (!bfc || !record) + return -EFAULT; + + if (bfc->bc_last_md_record_offset < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + record_size = le16_to_cpu(record->h_record_size); + file_pos = incfs_get_end_offset(bfc->bc_file); + record->h_prev_md_offset = bfc->bc_last_md_record_offset; + record->h_next_md_offset = 0; + record->h_record_crc = cpu_to_le32(calc_md_crc(record)); + + /* Write the metadata record to the end of the backing file */ + record_offset = file_pos; + new_md_offset = cpu_to_le64(record_offset); + result = write_to_bf(bfc, record, record_size, file_pos, true); + if (result) + return result; + + /* Update next metadata offset in a previous record or a superblock. */ + if (bfc->bc_last_md_record_offset) { + /* + * Find a place in the previous md record where new record's + * offset needs to be saved. + */ + file_pos = bfc->bc_last_md_record_offset + + offsetof(struct incfs_md_header, h_next_md_offset); + } else { + /* + * No metadata yet, file a place to update in the + * file_header. + */ + file_pos = offsetof(struct incfs_file_header, + fh_first_md_offset); + } + result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), + file_pos, true); + if (result) + return result; + + bfc->bc_last_md_record_offset = record_offset; + return result; +} + +/* + * Reserve 0-filled space for the blockmap body, and append + * incfs_blockmap metadata record pointing to it. + */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off) +{ + struct incfs_blockmap blockmap = {}; + int result = 0; + loff_t file_end = 0; + size_t map_size = block_count * sizeof(struct incfs_blockmap_entry); + + if (!bfc) + return -EFAULT; + + blockmap.m_header.h_md_entry_type = INCFS_MD_BLOCK_MAP; + blockmap.m_header.h_record_size = cpu_to_le16(sizeof(blockmap)); + blockmap.m_header.h_next_md_offset = cpu_to_le64(0); + blockmap.m_block_count = cpu_to_le32(block_count); + + LOCK_REQUIRED(bfc->bc_mutex); + + /* Reserve 0-filled space for the blockmap body in the backing file. */ + file_end = incfs_get_end_offset(bfc->bc_file); + result = append_zeros(bfc, map_size); + if (result) + return result; + + /* Write blockmap metadata record pointing to the body written above. */ + blockmap.m_base_offset = cpu_to_le64(file_end); + result = append_md_to_backing_file(bfc, &blockmap.m_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, file_end); + } else if (map_base_off) { + *map_base_off = file_end; + } + + return result; +} + +/* + * Write file attribute data and metadata record to the backing file. + */ +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr) +{ + struct incfs_file_attr file_attr = {}; + int result = 0; + u32 crc = 0; + loff_t value_offset = 0; + + if (!bfc) + return -EFAULT; + + if (value.len > INCFS_MAX_FILE_ATTR_SIZE) + return -ENOSPC; + + LOCK_REQUIRED(bfc->bc_mutex); + + crc = crc32(0, value.data, value.len); + value_offset = incfs_get_end_offset(bfc->bc_file); + file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR; + file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr)); + file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); + file_attr.fa_size = cpu_to_le16((u16)value.len); + file_attr.fa_offset = cpu_to_le64(value_offset); + file_attr.fa_crc = cpu_to_le64(crc); + + result = write_to_bf(bfc, value.data, value.len, value_offset, true); + if (result) + return result; + + result = append_md_to_backing_file(bfc, &file_attr.fa_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, value_offset); + } else if (attr) { + *attr = file_attr; + } + + return result; +} + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig) +{ + struct incfs_file_signature sg = {}; + int result = 0; + loff_t rollback_pos = 0; + loff_t tree_area_pos = 0; + size_t alignment = 0; + + if (!bfc) + return -EFAULT; + if (root_hash.len > sizeof(sg.sg_root_hash)) + return -E2BIG; + + LOCK_REQUIRED(bfc->bc_mutex); + + rollback_pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; + sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); + sg.sg_header.h_next_md_offset = cpu_to_le64(0); + sg.sg_hash_alg = hash_alg; + if (sig.data != NULL && sig.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_sig_size = cpu_to_le32(sig.len); + sg.sg_sig_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, sig.data, sig.len, pos, false); + if (result) + goto err; + } + + if (add_data.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_add_data_size = cpu_to_le32(add_data.len); + sg.sg_add_data_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, add_data.data, + add_data.len, pos, false); + if (result) + goto err; + } + + tree_area_pos = incfs_get_end_offset(bfc->bc_file); + if (hash_alg && tree_size > 0) { + if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { + /* + * If hash tree is big enough, it makes sense to + * align in the backing file for faster access. + */ + loff_t offset = round_up(tree_area_pos, PAGE_SIZE); + + alignment = offset - tree_area_pos; + tree_area_pos = offset; + } + + /* + * If root hash is not the only hash in the tree. + * reserve 0-filled space for the tree. + */ + result = append_zeros(bfc, tree_size + alignment); + if (result) + goto err; + + sg.sg_hash_tree_size = cpu_to_le32(tree_size); + sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); + } + memcpy(sg.sg_root_hash, root_hash.data, root_hash.len); + + /* Write a hash tree metadata record pointing to the hash tree above. */ + result = append_md_to_backing_file(bfc, &sg.sg_header); +err: + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, rollback_pos); + } + return result; +} + +/* + * Write a backing file header + * It should always be called only on empty file. + * incfs_super_block.s_first_md_offset is 0 for now, but will be updated + * once first metadata record is added. + */ +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + struct incfs_file_header fh = {}; + loff_t file_pos = 0; + + if (!bfc) + return -EFAULT; + + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); + fh.fh_header_size = cpu_to_le16(sizeof(fh)); + fh.fh_first_md_offset = cpu_to_le64(0); + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + + fh.fh_file_size = cpu_to_le64(file_size); + fh.fh_uuid = *uuid; + + LOCK_REQUIRED(bfc->bc_mutex); + + file_pos = incfs_get_end_offset(bfc->bc_file); + if (file_pos != 0) + return -EEXIST; + + return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true); +} + +/* Write a given data block and update file's blockmap to point it. */ +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, int block_index, + loff_t bm_base_off, u16 flags) +{ + struct incfs_blockmap_entry bm_entry = {}; + int result = 0; + loff_t data_offset = 0; + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * block_index; + + if (!bfc) + return -EFAULT; + + if (block.len >= (1 << 16) || block_index < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = incfs_get_end_offset(bfc->bc_file); + if (data_offset <= bm_entry_off) { + /* Blockmap entry is beyond the file's end. It is not normal. */ + return -EINVAL; + } + + /* Write the block data at the end of the backing file. */ + result = write_to_bf(bfc, block.data, block.len, data_offset, false); + if (result) + return result; + + /* Update the blockmap to point to the newly written data. */ + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); + bm_entry.me_data_size = cpu_to_le16((u16)block.len); + bm_entry.me_flags = cpu_to_le16(flags); + + result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry), + bm_entry_off, false); + return result; +} + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off) +{ + loff_t data_offset = 0; + loff_t file_end = 0; + + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = hash_area_off + block_index * INCFS_DATA_FILE_BLOCK_SIZE; + file_end = incfs_get_end_offset(bfc->bc_file); + if (data_offset + block.len > file_end) { + /* Block is located beyond the file's end. It is not normal. */ + return -EINVAL; + } + + return write_to_bf(bfc, block.data, block.len, data_offset, false); +} + +/* Initialize a new image in a given backing file. */ +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + int result = 0; + + if (!bfc || !bfc->bc_file) + return -EFAULT; + + result = mutex_lock_interruptible(&bfc->bc_mutex); + if (result) + goto out; + + result = truncate_backing_file(bfc, 0); + if (result) + goto out; + + result = incfs_write_fh_to_backing_file(bfc, uuid, file_size); +out: + mutex_unlock(&bfc->bc_mutex); + return result; +} + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry) +{ + return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, + bm_base_off); +} + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off) +{ + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * start_index; + const size_t bytes_to_read = sizeof(struct incfs_blockmap_entry) + * blocks_number; + int result = 0; + + if (!bfc || !entries) + return -EFAULT; + + if (start_index < 0 || bm_base_off <= 0) + return -ENODATA; + + result = incfs_kread(bfc->bc_file, entries, bytes_to_read, + bm_entry_off); + if (result < 0) + return result; + if (result < bytes_to_read) + return -EIO; + return 0; +} + + +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size) +{ + ssize_t bytes_read = 0; + struct incfs_file_header fh = {}; + + if (!bfc || !first_md_off) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0); + if (bytes_read < 0) + return bytes_read; + + if (bytes_read < sizeof(fh)) + return -EBADMSG; + + if (le64_to_cpu(fh.fh_magic) != INCFS_MAGIC_NUMBER) + return -EILSEQ; + + if (le64_to_cpu(fh.fh_version) > INCFS_FORMAT_CURRENT_VER) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_data_block_size) != INCFS_DATA_FILE_BLOCK_SIZE) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_header_size) != sizeof(fh)) + return -EILSEQ; + + if (first_md_off) + *first_md_off = le64_to_cpu(fh.fh_first_md_offset); + if (uuid) + *uuid = fh.fh_uuid; + if (file_size) + *file_size = le64_to_cpu(fh.fh_file_size); + return 0; +} + +/* + * Read through metadata records from the backing file one by one + * and call provided metadata handlers. + */ +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler) +{ + const ssize_t max_md_size = INCFS_MAX_METADATA_RECORD_SIZE; + ssize_t bytes_read = 0; + size_t md_record_size = 0; + loff_t next_record = 0; + loff_t prev_record = 0; + int res = 0; + struct incfs_md_header *md_hdr = NULL; + + if (!bfc || !handler) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (handler->md_record_offset == 0) + return -EPERM; + + memset(&handler->md_buffer, 0, max_md_size); + bytes_read = incfs_kread(bfc->bc_file, &handler->md_buffer, + max_md_size, handler->md_record_offset); + if (bytes_read < 0) + return bytes_read; + if (bytes_read < sizeof(*md_hdr)) + return -EBADMSG; + + md_hdr = &handler->md_buffer.md_header; + next_record = le64_to_cpu(md_hdr->h_next_md_offset); + prev_record = le64_to_cpu(md_hdr->h_prev_md_offset); + md_record_size = le16_to_cpu(md_hdr->h_record_size); + + if (md_record_size > max_md_size) { + pr_warn("incfs: The record is too large. Size: %ld", + md_record_size); + return -EBADMSG; + } + + if (bytes_read < md_record_size) { + pr_warn("incfs: The record hasn't been fully read."); + return -EBADMSG; + } + + if (next_record <= handler->md_record_offset && next_record != 0) { + pr_warn("incfs: Next record (%lld) points back in file.", + next_record); + return -EBADMSG; + } + + if (prev_record != handler->md_prev_record_offset) { + pr_warn("incfs: Metadata chain has been corrupted."); + return -EBADMSG; + } + + if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) { + pr_warn("incfs: Metadata CRC mismatch."); + return -EBADMSG; + } + + switch (md_hdr->h_md_entry_type) { + case INCFS_MD_NONE: + break; + case INCFS_MD_BLOCK_MAP: + if (handler->handle_blockmap) + res = handler->handle_blockmap( + &handler->md_buffer.blockmap, handler); + break; + case INCFS_MD_FILE_ATTR: + if (handler->handle_file_attr) + res = handler->handle_file_attr( + &handler->md_buffer.file_attr, handler); + break; + case INCFS_MD_SIGNATURE: + if (handler->handle_signature) + res = handler->handle_signature( + &handler->md_buffer.signature, handler); + break; + default: + res = -ENOTSUPP; + break; + } + + if (!res) { + if (next_record == 0) { + /* + * Zero offset for the next record means that the last + * metadata record has just been processed. + */ + bfc->bc_last_md_record_offset = + handler->md_record_offset; + } + handler->md_prev_record_offset = handler->md_record_offset; + handler->md_record_offset = next_record; + } + return res; +} + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + return kernel_read(f, pos, (char *)buf, size); +#else + return kernel_read(f, buf, size, &pos); +#endif +} + +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + return kernel_write(f, buf, size, pos); +#else + return kernel_write(f, buf, size, &pos); +#endif +} diff --git a/fs/incfs/format.h b/fs/incfs/format.h new file mode 100644 index 000000000000..a86881482e19 --- /dev/null +++ b/fs/incfs/format.h @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +/* + * Overview + * -------- + * The backbone of the incremental-fs ondisk format is an append only linked + * list of metadata blocks. Each metadata block contains an offset of the next + * one. These blocks describe files and directories on the + * file system. They also represent actions of adding and removing file names + * (hard links). + * + * Every time incremental-fs instance is mounted, it reads through this list + * to recreate filesystem's state in memory. An offset of the first record in + * the metadata list is stored in the superblock at the beginning of the backing + * file. + * + * Most of the backing file is taken by data areas and blockmaps. + * Since data blocks can be compressed and have different sizes, + * single per-file data area can't be pre-allocated. That's why blockmaps are + * needed in order to find a location and size of each data block in + * the backing file. Each time a file is created, a corresponding block map is + * allocated to store future offsets of data blocks. + * + * Whenever a data block is given by data loader to incremental-fs: + * - A data area with the given block is appended to the end of + * the backing file. + * - A record in the blockmap for the given block index is updated to reflect + * its location, size, and compression algorithm. + + * Metadata records + * ---------------- + * incfs_blockmap - metadata record that specifies size and location + * of a blockmap area for a given file. This area + * contains an array of incfs_blockmap_entry-s. + * incfs_file_signature - metadata record that specifies where file signature + * and its hash tree can be found in the backing file. + * + * incfs_file_attr - metadata record that specifies where additional file + * attributes blob can be found. + * + * Metadata header + * --------------- + * incfs_md_header - header of a metadata record. It's always a part + * of other structures and served purpose of metadata + * bookkeeping. + * + * +-----------------------------------------------+ ^ + * | incfs_md_header | | + * | 1. type of body(BLOCKMAP, FILE_ATTR..) | | + * | 2. size of the whole record header + body | | + * | 3. CRC the whole record header + body | | + * | 4. offset of the previous md record |]------+ + * | 5. offset of the next md record (md link) |]---+ + * +-----------------------------------------------+ | + * | Metadata record body with useful data | | + * +-----------------------------------------------+ | + * +---> + * + * Other ondisk structures + * ----------------------- + * incfs_super_block - backing file header + * incfs_blockmap_entry - a record in a blockmap area that describes size + * and location of a data block. + * Data blocks dont have any particular structure, they are written to the + * backing file in a raw form as they come from a data loader. + * + * Backing file layout + * ------------------- + * + * + * +-------------------------------------------+ + * | incfs_super_block |]---+ + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_signature |]---+ + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | metadata + * +------->| blockmap area | | list links + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | +--[| [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | +-------------------------------------------+ | + * | | ......................... | + * | | +-------------------------------------------+ | + * | | | metadata |<---+ + * +----|--[| incfs_blockmap |]---+ + * | +-------------------------------------------+ | + * | ......................... | + * | +-------------------------------------------+ | + * +-->| data block | | + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_attr | + * +-------------------------------------------+ + */ +#ifndef _INCFS_FORMAT_H +#define _INCFS_FORMAT_H +#include +#include +#include + +#include "internal.h" + +#define INCFS_MAX_NAME_LEN 255 +#define INCFS_FORMAT_V1 1 +#define INCFS_FORMAT_CURRENT_VER INCFS_FORMAT_V1 + +enum incfs_metadata_type { + INCFS_MD_NONE = 0, + INCFS_MD_BLOCK_MAP = 1, + INCFS_MD_FILE_ATTR = 2, + INCFS_MD_SIGNATURE = 3 +}; + +/* Header included at the beginning of all metadata records on the disk. */ +struct incfs_md_header { + __u8 h_md_entry_type; + + /* + * Size of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le16 h_record_size; + + /* + * CRC32 of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le32 h_record_crc; + + /* Offset of the next metadata entry if any */ + __le64 h_next_md_offset; + + /* Offset of the previous metadata entry if any */ + __le64 h_prev_md_offset; + +} __packed; + +/* Backing file header */ +struct incfs_file_header { + /* Magic number: INCFS_MAGIC_NUMBER */ + __le64 fh_magic; + + /* Format version: INCFS_FORMAT_CURRENT_VER */ + __le64 fh_version; + + /* sizeof(incfs_file_header) */ + __le16 fh_header_size; + + /* INCFS_DATA_FILE_BLOCK_SIZE */ + __le16 fh_data_block_size; + + /* Padding, also reserved for future use. */ + __le32 fh_dummy; + + /* Offset of the first metadata record */ + __le64 fh_first_md_offset; + + /* + * Put file specific information after this point + */ + + /* Full size of the file's content */ + __le64 fh_file_size; + + /* File uuid */ + incfs_uuid_t fh_uuid; +} __packed; + +enum incfs_block_map_entry_flags { + INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), +}; + +/* Block map entry pointing to an actual location of the data block. */ +struct incfs_blockmap_entry { + /* Offset of the actual data block. Lower 32 bits */ + __le32 me_data_offset_lo; + + /* Offset of the actual data block. Higher 16 bits */ + __le16 me_data_offset_hi; + + /* How many bytes the data actually occupies in the backing file */ + __le16 me_data_size; + + /* Block flags from incfs_block_map_entry_flags */ + __le16 me_flags; +} __packed; + +/* Metadata record for locations of file blocks. Type = INCFS_MD_BLOCK_MAP */ +struct incfs_blockmap { + struct incfs_md_header m_header; + + /* Base offset of the array of incfs_blockmap_entry */ + __le64 m_base_offset; + + /* Size of the map entry array in blocks */ + __le32 m_block_count; +} __packed; + +/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */ +struct incfs_file_attr { + struct incfs_md_header fa_header; + + __le64 fa_offset; + + __le16 fa_size; + + __le32 fa_crc; +} __packed; + +/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */ +struct incfs_file_signature { + struct incfs_md_header sg_header; + + __u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */ + + __le32 sg_hash_tree_size; /* The size of the hash tree. */ + + __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ + + __u8 sg_root_hash[INCFS_MAX_HASH_SIZE]; + + __le32 sg_sig_size; /* The size of the pkcs7 signature. */ + + __le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */ + + __le32 sg_add_data_size; /* The size of the additional data. */ + + __le64 sg_add_data_offset; /* Additional data's offset */ +} __packed; + +/* State of the backing file. */ +struct backing_file_context { + /* Protects writes to bc_file */ + struct mutex bc_mutex; + + /* File object to read data from */ + struct file *bc_file; + + /* + * Offset of the last known metadata record in the backing file. + * 0 means there are no metadata records. + */ + loff_t bc_last_md_record_offset; +}; + + +/* Backing file locations of things required for signature validation. */ +struct ondisk_signature { + + loff_t add_data_offset; /* Additional data's offset */ + + loff_t sig_offset; /* pkcs7 signature's offset in the backing file */ + + loff_t mtree_offset; /* Backing file offset of the hash tree. */ + + u32 add_data_size; /* The size of the additional data. */ + + u32 sig_size; /* The size of the pkcs7 signature. */ + + u32 mtree_size; /* The size of the hash tree. */ +}; + +struct metadata_handler { + loff_t md_record_offset; + loff_t md_prev_record_offset; + void *context; + + union { + struct incfs_md_header md_header; + struct incfs_blockmap blockmap; + struct incfs_file_attr file_attr; + struct incfs_file_signature signature; + } md_buffer; + + int (*handle_blockmap)(struct incfs_blockmap *bm, + struct metadata_handler *handler); + int (*handle_file_attr)(struct incfs_file_attr *fa, + struct metadata_handler *handler); + int (*handle_signature)(struct incfs_file_signature *sig, + struct metadata_handler *handler); +}; +#define INCFS_MAX_METADATA_RECORD_SIZE \ + FIELD_SIZEOF(struct metadata_handler, md_buffer) + +loff_t incfs_get_end_offset(struct file *f); + +/* Backing file context management */ +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file); + +void incfs_free_bfc(struct backing_file_context *bfc); + +/* Writing stuff */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off); + +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t bm_base_off, + u16 flags); + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off); + +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr); + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig); + +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +/* Reading stuff */ +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size); + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry); + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off); + +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler); + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos); +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos); + +#endif /* _INCFS_FORMAT_H */ diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c new file mode 100644 index 000000000000..c6444e73e4d8 --- /dev/null +++ b/fs/incfs/integrity.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include + +#include "integrity.h" + +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data) +{ + struct pkcs7_message *pkcs7 = NULL; + const void *data = NULL; + size_t data_len = 0; + const char *p; + int err; + + pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len); + if (IS_ERR(pkcs7)) { + pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n", + pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7)); + return PTR_ERR(pkcs7); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) + err = pkcs7_get_content_data(pkcs7, &data, &data_len, false); +#else + err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); +#endif + if (err || data_len == 0 || data == NULL) { + pr_debug("PKCS#7 message does not contain data\n"); + err = -EBADMSG; + goto out; + } + + if (root_hash.len == 0) { + pr_debug("Root hash is empty.\n"); + err = -EBADMSG; + goto out; + } + + if (data_len != root_hash.len + add_data.len) { + pr_debug("PKCS#7 data size doesn't match arguments.\n"); + err = -EKEYREJECTED; + goto out; + } + + p = data; + if (memcmp(p, root_hash.data, root_hash.len) != 0) { + pr_debug("Root hash mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + p += root_hash.len; + if (memcmp(p, add_data.data, add_data.len) != 0) { + pr_debug("Additional data mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + + err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE); + if (err) + pr_debug("PKCS#7 signature verification error: %d\n", -err); + + /* + * RSA signature verification sometimes returns unexpected error codes + * when signature doesn't match. + */ + if (err == -ERANGE || err == -EINVAL) + err = -EBADMSG; + +out: + pkcs7_free_message(pkcs7); + return err; +} + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) +{ + static struct incfs_hash_alg sha256 = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .id = INCFS_HASH_TREE_SHA256 + }; + struct incfs_hash_alg *result = NULL; + struct crypto_shash *shash; + + if (id == INCFS_HASH_TREE_SHA256) { + BUILD_BUG_ON(INCFS_MAX_HASH_SIZE < SHA256_DIGEST_SIZE); + result = &sha256; + } + + if (result == NULL) + return ERR_PTR(-ENOENT); + + /* pairs with cmpxchg_release() below */ + shash = smp_load_acquire(&result->shash); + if (shash) + return result; + + shash = crypto_alloc_shash(result->name, 0, 0); + if (IS_ERR(shash)) { + int err = PTR_ERR(shash); + + pr_err("Can't allocate hash alg %s, error code:%d", + result->name, err); + return ERR_PTR(err); + } + + /* pairs with smp_load_acquire() above */ + if (cmpxchg_release(&result->shash, NULL, shash) != NULL) + crypto_free_shash(shash); + + return result; +} + + +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash) +{ + struct mtree *result = NULL; + struct incfs_hash_alg *hash_alg = NULL; + int hash_per_block; + int lvl; + int total_blocks = 0; + int blocks_in_level[INCFS_MAX_MTREE_LEVELS]; + int blocks = data_block_count; + + if (data_block_count <= 0) + return ERR_PTR(-EINVAL); + + hash_alg = incfs_get_hash_alg(id); + if (IS_ERR(hash_alg)) + return ERR_PTR(PTR_ERR(hash_alg)); + + if (root_hash.len < hash_alg->digest_size) + return ERR_PTR(-EINVAL); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->alg = hash_alg; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / result->alg->digest_size; + + /* Calculating tree geometry. */ + /* First pass: calculate how many blocks in each tree level. */ + for (lvl = 0; blocks > 1; lvl++) { + if (lvl >= INCFS_MAX_MTREE_LEVELS) { + pr_err("incfs: too much data in mtree"); + goto err; + } + + blocks = (blocks + hash_per_block - 1) / hash_per_block; + blocks_in_level[lvl] = blocks; + total_blocks += blocks; + } + result->depth = lvl; + result->hash_tree_area_size = total_blocks * INCFS_DATA_FILE_BLOCK_SIZE; + if (result->hash_tree_area_size > INCFS_MAX_HASH_AREA_SIZE) + goto err; + + blocks = 0; + /* Second pass: calculate offset of each level. 0th level goes last. */ + for (lvl = 0; lvl < result->depth; lvl++) { + u32 suboffset; + + blocks += blocks_in_level[lvl]; + suboffset = (total_blocks - blocks) + * INCFS_DATA_FILE_BLOCK_SIZE; + + result->hash_level_suboffset[lvl] = suboffset; + } + + /* Root hash is stored separately from the rest of the tree. */ + memcpy(result->root_hash, root_hash.data, hash_alg->digest_size); + return result; + +err: + kfree(result); + return ERR_PTR(-E2BIG); +} + +void incfs_free_mtree(struct mtree *tree) +{ + kfree(tree); +} + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest) +{ + SHASH_DESC_ON_STACK(desc, alg->shash); + + if (!alg || !alg->shash || !data.data || !digest.data) + return -EFAULT; + + if (alg->digest_size > digest.len) + return -EINVAL; + + desc->tfm = alg->shash; + return crypto_shash_digest(desc, data.data, data.len, digest.data); +} + +void incfs_free_signature_info(struct signature_info *si) +{ + if (!si) + return; + kfree(si->root_hash.data); + kfree(si->additional_data.data); + kfree(si->signature.data); + kfree(si); +} + diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h new file mode 100644 index 000000000000..da1c38486b2f --- /dev/null +++ b/fs/incfs/integrity.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_INTEGRITY_H +#define _INCFS_INTEGRITY_H +#include +#include +#include + +#include + +#include "internal.h" + +#define INCFS_MAX_MTREE_LEVELS 8 +#define INCFS_MAX_HASH_AREA_SIZE (1280 * 1024 * 1024) + +struct incfs_hash_alg { + const char *name; + int digest_size; + enum incfs_hash_tree_algorithm id; + + struct crypto_shash *shash; +}; + +/* Merkle tree structure. */ +struct mtree { + struct incfs_hash_alg *alg; + + u8 root_hash[INCFS_MAX_HASH_SIZE]; + + /* Offset of each hash level in the hash area. */ + u32 hash_level_suboffset[INCFS_MAX_MTREE_LEVELS]; + + u32 hash_tree_area_size; + + /* Number of levels in hash_level_suboffset */ + int depth; +}; + +struct signature_info { + struct mem_range root_hash; + + struct mem_range additional_data; + + struct mem_range signature; + + enum incfs_hash_tree_algorithm hash_alg; +}; + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); + +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash); + +void incfs_free_mtree(struct mtree *tree); + +size_t incfs_get_mtree_depth(enum incfs_hash_tree_algorithm alg, loff_t size); + +size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, + loff_t size); + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest); + +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data); + +void incfs_free_signature_info(struct signature_info *si); + +#endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/internal.h b/fs/incfs/internal.h new file mode 100644 index 000000000000..0a85eaed41d3 --- /dev/null +++ b/fs/incfs/internal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ +#ifndef _INCFS_INTERNAL_H +#define _INCFS_INTERNAL_H +#include + +struct mem_range { + u8 *data; + size_t len; +}; + +static inline struct mem_range range(u8 *data, size_t len) +{ + return (struct mem_range){ .data = data, .len = len }; +} + +#define LOCK_REQUIRED(lock) WARN_ON_ONCE(!mutex_is_locked(&lock)) + +#endif /* _INCFS_INTERNAL_H */ diff --git a/fs/incfs/main.c b/fs/incfs/main.c new file mode 100644 index 000000000000..d9eec7496846 --- /dev/null +++ b/fs/incfs/main.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include + +#include + +#include "vfs.h" + +#define INCFS_NODE_FEATURES "features" + +struct file_system_type incfs_fs_type = { + .owner = THIS_MODULE, + .name = INCFS_NAME, + .mount = incfs_mount_fs, + .kill_sb = incfs_kill_sb, + .fs_flags = 0 +}; + +static struct kobject *sysfs_root, *featurefs_root; + +static ssize_t corefs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "supported\n"); +} + +static struct kobj_attribute corefs_attr = __ATTR_RO(corefs); + +static struct attribute *attributes[] = { + &corefs_attr.attr, + NULL, +}; + +static const struct attribute_group attr_group = { + .attrs = attributes, +}; + +static int __init init_sysfs(void) +{ + int res = 0; + + sysfs_root = kobject_create_and_add(INCFS_NAME, fs_kobj); + if (!sysfs_root) + return -ENOMEM; + + featurefs_root = kobject_create_and_add(INCFS_NODE_FEATURES, + sysfs_root); + if (!featurefs_root) + return -ENOMEM; + + res = sysfs_create_group(featurefs_root, &attr_group); + if (res) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } + return res; +} + +static void cleanup_sysfs(void) +{ + if (featurefs_root) { + sysfs_remove_group(featurefs_root, &attr_group); + kobject_put(featurefs_root); + featurefs_root = NULL; + } + + if (sysfs_root) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } +} + +static int __init init_incfs_module(void) +{ + int err = 0; + + err = init_sysfs(); + if (err) + return err; + + err = register_filesystem(&incfs_fs_type); + if (err) + cleanup_sysfs(); + + return err; +} + +static void __exit cleanup_incfs_module(void) +{ + cleanup_sysfs(); + unregister_filesystem(&incfs_fs_type); +} + +module_init(init_incfs_module); +module_exit(cleanup_incfs_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eugene Zemtsov "); +MODULE_DESCRIPTION("Incremental File System"); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c new file mode 100644 index 000000000000..0163d766f80c --- /dev/null +++ b/fs/incfs/vfs.c @@ -0,0 +1,2202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" +#include "internal.h" + +#define INCFS_PENDING_READS_INODE 2 +#define INCFS_LOG_INODE 3 +#define INCFS_START_INO_RANGE 10 +#define READ_FILE_MODE 0444 +#define READ_EXEC_FILE_MODE 0555 +#define READ_WRITE_FILE_MODE 0666 + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); + +static int dentry_revalidate(struct dentry *dentry, unsigned int flags); +static void dentry_release(struct dentry *d); + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx); +static struct dentry *dir_lookup(struct inode *dir_inode, + struct dentry *dentry, unsigned int flags); +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +static int dir_unlink(struct inode *dir, struct dentry *dentry); +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); +static int dir_rmdir(struct inode *dir, struct dentry *dentry); +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + +static int file_open(struct inode *inode, struct file *file); +static int file_release(struct inode *inode, struct file *file); +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset); +static int read_single_page(struct file *f, struct page *page); +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t pending_reads_poll(struct file *file, poll_table *wait); +static int pending_reads_open(struct inode *inode, struct file *file); +static int pending_reads_release(struct inode *, struct file *); + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t log_poll(struct file *file, poll_table *wait); +static int log_open(struct inode *inode, struct file *file); +static int log_release(struct inode *, struct file *); + +static struct inode *alloc_inode(struct super_block *sb); +static void free_inode(struct inode *inode); +static void evict_inode(struct inode *inode); + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size); +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); + +static int show_options(struct seq_file *, struct dentry *); + +static const struct super_operations incfs_super_ops = { + .statfs = simple_statfs, + .remount_fs = incfs_remount_fs, + .alloc_inode = alloc_inode, + .destroy_inode = free_inode, + .evict_inode = evict_inode, + .show_options = show_options +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) +#define dir_rename_wrap dir_rename +#else +static int dir_rename_wrap(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + return dir_rename(old_dir, old_dentry, new_dir, new_dentry); +} +#endif + +static const struct inode_operations incfs_dir_inode_ops = { + .lookup = dir_lookup, + .mkdir = dir_mkdir, + .rename = dir_rename_wrap, + .unlink = dir_unlink, + .link = dir_link, + .rmdir = dir_rmdir +}; + +static const struct file_operations incfs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = iterate_incfs_dir, + .open = file_open, + .release = file_release, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct dentry_operations incfs_dentry_ops = { + .d_revalidate = dentry_revalidate, + .d_release = dentry_release +}; + +static const struct address_space_operations incfs_address_space_ops = { + .readpage = read_single_page, + /* .readpages = readpages */ +}; + +static const struct file_operations incfs_file_ops = { + .open = file_open, + .release = file_release, + .write = file_write, + .read_iter = generic_file_read_iter, + .mmap = generic_file_mmap, + .splice_read = generic_file_splice_read, + .llseek = generic_file_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct file_operations incfs_pending_read_file_ops = { + .read = pending_reads_read, + .poll = pending_reads_poll, + .open = pending_reads_open, + .release = pending_reads_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct file_operations incfs_log_file_ops = { + .read = log_read, + .poll = log_poll, + .open = log_open, + .release = log_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0) + +static const struct inode_operations incfs_file_inode_ops = { + .setattr = simple_setattr, + .getattr = simple_getattr, + .getxattr = incfs_getxattr, + .listxattr = incfs_listxattr +}; + +#else + +static const struct inode_operations incfs_file_inode_ops = { + .setattr = simple_setattr, + .getattr = simple_getattr, + .listxattr = incfs_listxattr +}; + +static int incfs_handler_getxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, void *buffer, size_t size, + int flags) +{ + return incfs_getxattr(d, name, buffer, size); +} + +static const struct xattr_handler incfs_xattr_handler = { + .prefix = "", /* AKA all attributes */ + .get = incfs_handler_getxattr, +}; + +const struct xattr_handler *incfs_xattr_ops[] = { + &incfs_xattr_handler, + NULL, +}; + + +#endif + +/* State of an open .pending_reads file, unique for each file descriptor. */ +struct pending_reads_state { + /* A serial number of the last pending read obtained from this file. */ + int last_pending_read_sn; +}; + +/* State of an open .log file, unique for each file descriptor. */ +struct log_file_state { + struct read_log_state state; +}; + +struct inode_search { + unsigned long ino; + + struct dentry *backing_dentry; +}; + +enum parse_parameter { + Opt_read_timeout, + Opt_readahead_pages, + Opt_no_backing_file_cache, + Opt_no_backing_file_readahead, + Opt_rlog_pages, + Opt_rlog_wakeup_cnt, + Opt_err +}; + +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; +static struct mem_range pending_reads_file_name_range = { + .data = (u8 *)pending_reads_file_name, + .len = ARRAY_SIZE(pending_reads_file_name) - 1 +}; + +static const char log_file_name[] = INCFS_LOG_FILENAME; +static struct mem_range log_file_name_range = { + .data = (u8 *)log_file_name, + .len = ARRAY_SIZE(log_file_name) - 1 +}; + +static const match_table_t option_tokens = { + { Opt_read_timeout, "read_timeout_ms=%u" }, + { Opt_readahead_pages, "readahead=%u" }, + { Opt_no_backing_file_cache, "no_bf_cache=%u" }, + { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, + { Opt_rlog_pages, "rlog_pages=%u" }, + { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, + { Opt_err, NULL } +}; + +static int parse_options(struct mount_options *opts, char *str) +{ + substring_t args[MAX_OPT_ARGS]; + int value; + char *position; + + if (opts == NULL) + return -EFAULT; + + opts->read_timeout_ms = 1000; /* Default: 1s */ + opts->readahead_pages = 10; + opts->read_log_pages = 2; + opts->read_log_wakeup_count = 10; + opts->no_backing_file_cache = false; + opts->no_backing_file_readahead = false; + if (str == NULL || *str == 0) + return 0; + + while ((position = strsep(&str, ",")) != NULL) { + int token; + + if (!*position) + continue; + + token = match_token(position, option_tokens, args); + + switch (token) { + case Opt_read_timeout: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_timeout_ms = value; + break; + case Opt_readahead_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->readahead_pages = value; + break; + case Opt_no_backing_file_cache: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_cache = (value != 0); + break; + case Opt_no_backing_file_readahead: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_readahead = (value != 0); + break; + case Opt_rlog_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_pages = value; + break; + case Opt_rlog_wakeup_cnt: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_wakeup_count = value; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static struct super_block *file_superblock(struct file *f) +{ + struct inode *inode = file_inode(f); + + return inode->i_sb; +} + +static struct mount_info *get_mount_info(struct super_block *sb) +{ + struct mount_info *result = sb->s_fs_info; + + WARN_ON(!result); + return result; +} + +/* Read file size from the attribute. Quicker than reading the header */ +static u64 read_size_attr(struct dentry *backing_dentry) +{ + __le64 attr_value; + ssize_t bytes_read; + + bytes_read = vfs_getxattr(backing_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&attr_value, sizeof(attr_value)); + + if (bytes_read != sizeof(attr_value)) + return 0; + + return le64_to_cpu(attr_value); +} + +static int inode_test(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (!node) + return 0; + + if (search->backing_dentry) { + struct inode *backing_inode = d_inode(search->backing_dentry); + + return (node->n_backing_inode == backing_inode) && + inode->i_ino == search->ino; + } + return 1; +} + +static int inode_set(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (search->backing_dentry) { + /* It's a regular inode that has corresponding backing inode */ + struct dentry *backing_dentry = search->backing_dentry; + struct inode *backing_inode = d_inode(backing_dentry); + + inode_init_owner(inode, NULL, backing_inode->i_mode); + fsstack_copy_attr_all(inode, backing_inode); + if (S_ISREG(inode->i_mode)) { + u64 size = read_size_attr(backing_dentry); + + inode->i_size = size; + inode->i_blocks = get_blocks_count_for_size(size); + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_file_ops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_size = 0; + inode->i_blocks = 1; + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_dir_inode_ops; + inode->i_fop = &incfs_dir_fops; + } else { + pr_warn_once("incfs: Unexpected inode type\n"); + return -EBADF; + } + + ihold(backing_inode); + node->n_backing_inode = backing_inode; + node->n_mount_info = get_mount_info(inode->i_sb); + inode->i_ctime = backing_inode->i_ctime; + inode->i_mtime = backing_inode->i_mtime; + inode->i_atime = backing_inode->i_atime; + inode->i_ino = backing_inode->i_ino; + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { + pr_warn("incfs: ino conflict with backing FS %ld\n", + backing_inode->i_ino); + } + return 0; + } else if (search->ino == INCFS_PENDING_READS_INODE) { + /* It's an inode for .pending_reads pseudo file. */ + + inode->i_ctime = (struct timespec64){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_PENDING_READS_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_pending_read_file_ops; + + } else if (search->ino == INCFS_LOG_INODE) { + /* It's an inode for .log pseudo file. */ + + inode->i_ctime = (struct timespec64){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_LOG_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_log_file_ops; + + } else { + /* Unknown inode requested. */ + return -EINVAL; + } + + return 0; +} + +static struct inode *fetch_regular_inode(struct super_block *sb, + struct dentry *backing_dentry) +{ + struct inode *backing_inode = d_inode(backing_dentry); + struct inode_search search = { + .ino = backing_inode->i_ino, + .backing_dentry = backing_dentry + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct pending_reads_state *pr_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = NULL; + size_t reads_to_collect = len / sizeof(*reads_buf); + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); + int new_max_sn = last_known_read_sn; + int reads_collected = 0; + ssize_t result = 0; + int i = 0; + + if (!access_ok(buf, len)) + return -EFAULT; + + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) + return 0; + + reads_buf = (struct incfs_pending_read_info *)get_zeroed_page(GFP_NOFS); + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); + + reads_collected = incfs_collect_pending_reads( + mi, last_known_read_sn, reads_buf, reads_to_collect); + if (reads_collected < 0) { + result = reads_collected; + goto out; + } + + for (i = 0; i < reads_collected; i++) + if (reads_buf[i].serial_number > new_max_sn) + new_max_sn = reads_buf[i].serial_number; + + /* + * Just to make sure that we don't accidentally copy more data + * to reads buffer than userspace can handle. + */ + reads_collected = min_t(size_t, reads_collected, reads_to_collect); + result = reads_collected * sizeof(*reads_buf); + + /* Copy reads info to the userspace buffer */ + if (copy_to_user(buf, reads_buf, result)) { + result = -EFAULT; + goto out; + } + + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + + +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) +{ + struct pending_reads_state *state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + __poll_t ret = 0; + + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); + if (incfs_fresh_pending_reads_exist(mi, + state->last_pending_read_sn)) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int pending_reads_open(struct inode *inode, struct file *file) +{ + struct pending_reads_state *state = NULL; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + + file->private_data = state; + return 0; +} + +static int pending_reads_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static struct inode *fetch_pending_reads_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_PENDING_READS_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int log_open(struct inode *inode, struct file *file) +{ + struct log_file_state *log_state = NULL; + struct mount_info *mi = get_mount_info(file_superblock(file)); + + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); + if (!log_state) + return -ENOMEM; + + log_state->state = incfs_get_log_state(mi); + file->private_data = log_state; + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct log_file_state *log_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = + (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); + size_t reads_to_collect = len / sizeof(*reads_buf); + size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); + int total_reads_collected = 0; + ssize_t result = 0; + + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect); + while (reads_to_collect > 0) { + struct read_log_state next_state = READ_ONCE(log_state->state); + int reads_collected = incfs_collect_logged_reads( + mi, &next_state, reads_buf, + min_t(size_t, reads_to_collect, reads_per_page)); + if (reads_collected <= 0) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + reads_collected; + goto out; + } + if (copy_to_user(buf, reads_buf, + reads_collected * sizeof(*reads_buf))) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + -EFAULT; + goto out; + } + + WRITE_ONCE(log_state->state, next_state); + total_reads_collected += reads_collected; + buf += reads_collected * sizeof(*reads_buf); + reads_to_collect -= reads_collected; + } + + result = total_reads_collected * sizeof(*reads_buf); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + +static __poll_t log_poll(struct file *file, poll_table *wait) +{ + struct log_file_state *log_state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + int count; + __poll_t ret = 0; + + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); + count = incfs_get_uncollected_logs_count(mi, log_state->state); + if (count >= mi->mi_options.read_log_wakeup_count) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static struct inode *fetch_log_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_LOG_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) +{ + struct dir_file *dir = get_incfs_dir_file(file); + int error = 0; + struct mount_info *mi = get_mount_info(file_superblock(file)); + bool root; + + if (!dir) { + error = -EBADF; + goto out; + } + + root = dir->backing_dir->f_inode + == d_inode(mi->mi_backing_dir_path.dentry); + + if (root && ctx->pos == 0) { + if (!dir_emit(ctx, pending_reads_file_name, + ARRAY_SIZE(pending_reads_file_name) - 1, + INCFS_PENDING_READS_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + if (root && ctx->pos == 1) { + if (!dir_emit(ctx, log_file_name, + ARRAY_SIZE(log_file_name) - 1, + INCFS_LOG_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + ctx->pos -= 2; + error = iterate_dir(dir->backing_dir, ctx); + ctx->pos += 2; + file->f_pos = dir->backing_dir->f_pos; +out: + if (error) + pr_warn("incfs: %s %s %d\n", __func__, + file->f_path.dentry->d_name.name, error); + return error; +} + +static int incfs_init_dentry(struct dentry *dentry, struct path *path) +{ + struct dentry_info *d_info = NULL; + + if (!dentry || !path) + return -EFAULT; + + d_info = kzalloc(sizeof(*d_info), GFP_NOFS); + if (!d_info) + return -ENOMEM; + + d_info->backing_path = *path; + path_get(path); + + dentry->d_fsdata = d_info; + return 0; +} + +static struct dentry *incfs_lookup_dentry(struct dentry *parent, + const char *name) +{ + struct inode *inode; + struct dentry *result = NULL; + + if (!parent) + return ERR_PTR(-EFAULT); + + inode = d_inode(parent); + inode_lock_nested(inode, I_MUTEX_PARENT); + result = lookup_one_len(name, parent, strlen(name)); + inode_unlock(inode); + + if (IS_ERR(result)) + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); + + return result; +} + +static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) +{ + static const char name[] = ".index"; + struct dentry *index_dentry; + struct inode *backing_inode = d_inode(backing_dir); + int err = 0; + + index_dentry = incfs_lookup_dentry(backing_dir, name); + if (!index_dentry) { + return ERR_PTR(-EINVAL); + } else if (IS_ERR(index_dentry)) { + return index_dentry; + } else if (d_really_is_positive(index_dentry)) { + /* Index already exists. */ + return index_dentry; + } + + /* Index needs to be created. */ + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(backing_inode, index_dentry, 0777); + inode_unlock(backing_inode); + + if (err) + return ERR_PTR(err); + + if (!d_really_is_positive(index_dentry)) { + dput(index_dentry); + return ERR_PTR(-EINVAL); + } + + return index_dentry; +} + +static int read_single_page(struct file *f, struct page *page) +{ + loff_t offset = 0; + loff_t size = 0; + ssize_t bytes_to_read = 0; + ssize_t read_result = 0; + struct data_file *df = get_incfs_data_file(f); + int result = 0; + void *page_start = kmap(page); + int block_index; + int timeout_ms; + + if (!df) + return -EBADF; + + offset = page_offset(page); + block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; + size = df->df_size; + timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; + + pr_debug("incfs: %s %s %lld\n", __func__, + f->f_path.dentry->d_name.name, offset); + + if (offset < size) { + struct mem_range tmp = { + .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); + bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); + read_result = incfs_read_data_file_block( + range(page_start, bytes_to_read), df, block_index, + timeout_ms, tmp); + + free_pages((unsigned long)tmp.data, get_order(tmp.len)); + } else { + bytes_to_read = 0; + read_result = 0; + } + + if (read_result < 0) + result = read_result; + else if (read_result < PAGE_SIZE) + zero_user(page, read_result, PAGE_SIZE - read_result); + + if (result == 0) + SetPageUptodate(page); + else + SetPageError(page); + + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + return result; +} + +static char *file_id_to_str(incfs_uuid_t id) +{ + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); + char *end; + + if (!result) + return NULL; + + end = bin2hex(result, id.bytes, sizeof(id.bytes)); + *end = 0; + return result; +} + +static struct signature_info *incfs_copy_signature_info_from_user( + struct incfs_file_signature_info __user *original) +{ + struct incfs_file_signature_info usr_si; + struct signature_info *result; + int error; + + if (!original) + return NULL; + + if (!access_ok(original, sizeof(usr_si))) + return ERR_PTR(-EFAULT); + + if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) + return ERR_PTR(-EFAULT); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->hash_alg = usr_si.hash_tree_alg; + + if (result->hash_alg) { + void *p = kzalloc(INCFS_MAX_HASH_SIZE, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + + // TODO this sets the root_hash length to MAX_HASH_SIZE not + // the actual size. Fix, then set INCFS_MAX_HASH_SIZE back + // to 64 + result->root_hash = range(p, INCFS_MAX_HASH_SIZE); + if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), + result->root_hash.len) > 0) { + error = -EFAULT; + goto err; + } + } + + if (usr_si.additional_data_size > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto err; + } + + if (usr_si.additional_data && usr_si.additional_data_size) { + void *p = kzalloc(usr_si.additional_data_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->additional_data = range(p, + usr_si.additional_data_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.additional_data), + result->additional_data.len) > 0) { + error = -EFAULT; + goto err; + } + } + + if (usr_si.signature_size > INCFS_MAX_SIGNATURE_SIZE) { + error = -E2BIG; + goto err; + } + + if (usr_si.signature && usr_si.signature_size) { + void *p = kzalloc(usr_si.signature_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->signature = range(p, usr_si.signature_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.signature), + result->signature.len) > 0) { + error = -EFAULT; + goto err; + } + } + + return result; + +err: + incfs_free_signature_info(result); + return ERR_PTR(-error); +} + +static int init_new_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + struct incfs_file_signature_info __user *fsi) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = 0; + u32 block_count; + struct mem_range mem_range = {0}; + struct signature_info *si = 0; + struct mtree *hash_tree = 0; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME, mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_fh_to_backing_file(bfc, uuid, size); + if (error) + goto out; + + block_count = (u32)get_blocks_count_for_size(size); + error = incfs_write_blockmap_to_backing_file(bfc, block_count, NULL); + if (error) + goto out; + + /* This fill has data, reserve space for the block map. */ + if (block_count > 0) { + error = incfs_write_blockmap_to_backing_file( + bfc, block_count, NULL); + if (error) + goto out; + } + + if (attr.data && attr.len) { + error = incfs_write_file_attr_to_backing_file(bfc, + attr, NULL); + if (error) + goto out; + } + + if (fsi) { + si = incfs_copy_signature_info_from_user(fsi); + + if (IS_ERR(si)) { + error = PTR_ERR(si); + si = NULL; + goto out; + } + + if (si->hash_alg) { + hash_tree = incfs_alloc_mtree(si->hash_alg, block_count, + si->root_hash); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + + // TODO This code seems wrong when len is zero - we + // should error out?? + if (si->signature.len > 0) + error = incfs_validate_pkcs7_signature( + si->signature, + si->root_hash, + si->additional_data); + if (error) + goto out; + + error = incfs_write_signature_to_backing_file(bfc, + si->hash_alg, + hash_tree->hash_tree_area_size, + si->root_hash, si->additional_data, + si->signature); + + if (error) + goto out; + } + } + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + incfs_free_mtree(hash_tree); + incfs_free_signature_info(si); + kfree(mem_range.data); + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static int incfs_link(struct dentry *what, struct dentry *where) +{ + struct dentry *parent_dentry = dget_parent(where); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_link(what, pinode, where, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_unlink(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_unlink(pinode, dentry, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_rmdir(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_rmdir(pinode, dentry); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int dir_relative_path_resolve( + struct mount_info *mi, + const char __user *relative_path, + struct path *result_path) +{ + struct path *base_path = &mi->mi_backing_dir_path; + int dir_fd = get_unused_fd_flags(0); + struct file *dir_f = NULL; + int error = 0; + + if (dir_fd < 0) + return dir_fd; + + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + + if (IS_ERR(dir_f)) { + error = PTR_ERR(dir_f); + goto out; + } + fd_install(dir_fd, dir_f); + + if (!relative_path) { + /* No relative path given, just return the base dir. */ + *result_path = *base_path; + path_get(result_path); + goto out; + } + + error = user_path_at_empty(dir_fd, relative_path, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + +out: + ksys_close(dir_fd); + if (error) + pr_debug("incfs: %s %d\n", __func__, error); + return error; +} + +static int validate_name(char *file_name) +{ + struct mem_range name = range(file_name, strlen(file_name)); + int i = 0; + + if (name.len > INCFS_MAX_NAME_LEN) + return -ENAMETOOLONG; + + if (incfs_equal_ranges(pending_reads_file_name_range, name)) + return -EINVAL; + + for (i = 0; i < name.len; i++) + if (name.data[i] == '/') + return -EINVAL; + + return 0; +} + +static long ioctl_create_file(struct mount_info *mi, + struct incfs_new_file_args __user *usr_args) +{ + struct incfs_new_file_args args; + char *file_id_str = NULL; + struct dentry *index_file_dentry = NULL; + struct dentry *named_file_dentry = NULL; + struct path parent_dir_path = {}; + struct inode *index_dir_inode = NULL; + __le64 size_attr_value = 0; + char *file_name = NULL; + char *attr_value = NULL; + int error = 0; + bool locked = false; + + if (!mi || !mi->mi_index_dir) { + error = -EFAULT; + goto out; + } + if (!access_ok(usr_args, sizeof(args))) { + error = -EFAULT; + goto out; + } + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { + error = -EFAULT; + goto out; + } + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + file_id_str = file_id_to_str(args.file_id); + if (!file_id_str) { + error = -ENOMEM; + goto out; + } + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + goto out; + locked = true; + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!named_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(named_file_dentry)) { + error = PTR_ERR(named_file_dentry); + named_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(named_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + /* Look up a dentry in the .index dir. It should be negative. */ + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (!index_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(index_file_dentry)) { + /* File with this ID already exists in index. */ + error = -EEXIST; + goto out; + } + + /* Creating a file in the .index dir. */ + index_dir_inode = d_inode(mi->mi_index_dir); + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); + error = vfs_create(index_dir_inode, index_file_dentry, + args.mode, true); + inode_unlock(index_dir_inode); + + if (error) + goto out; + if (!d_really_is_positive(index_file_dentry)) { + error = -EINVAL; + goto out; + } + + /* Save the file's ID as an xattr for easy fetching in future. */ + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, + file_id_str, strlen(file_id_str), XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's attrubute as an xattr */ + if (args.file_attr_len && args.file_attr) { + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto delete_index_file; + } + + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); + if (!attr_value) { + error = -ENOMEM; + goto delete_index_file; + } + + if (!access_ok(u64_to_user_ptr(args.file_attr), + args.file_attr_len)) { + error = -EFAULT; + goto delete_index_file; + } + + if (copy_from_user(attr_value, + u64_to_user_ptr(args.file_attr), + args.file_attr_len) > 0) { + error = -EFAULT; + goto delete_index_file; + } + + error = vfs_setxattr(index_file_dentry, + INCFS_XATTR_METADATA_NAME, + attr_value, args.file_attr_len, + XATTR_CREATE); + + if (error) + goto delete_index_file; + } + + /* Initializing a newly created file. */ + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, + range(attr_value, args.file_attr_len), + (struct incfs_file_signature_info __user *) + args.signature_info); + if (error) + goto delete_index_file; + + /* Linking a file with it's real name from the requested dir. */ + error = incfs_link(index_file_dentry, named_file_dentry); + + if (!error) + goto out; + +delete_index_file: + incfs_unlink(index_file_dentry); + +out: + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + + kfree(file_id_str); + kfree(file_name); + kfree(attr_value); + dput(named_file_dentry); + dput(index_file_dentry); + path_put(&parent_dir_path); + if (locked) + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static long ioctl_read_file_signature(struct file *f, void __user *arg) +{ + struct incfs_get_file_sig_args __user *args_usr_ptr = arg; + struct incfs_get_file_sig_args args = {}; + u8 *sig_buffer = NULL; + size_t sig_buf_size = 0; + int error = 0; + int read_result = 0; + struct data_file *df = get_incfs_data_file(f); + + if (!df) + return -EINVAL; + + if (!access_ok(args_usr_ptr, sizeof(args))) + return -EFAULT; + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(args.file_signature), + args.file_signature_buf_size)) + return -EFAULT; + + sig_buf_size = args.file_signature_buf_size; + if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) + return -E2BIG; + + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS); + if (!sig_buffer) + return -ENOMEM; + + read_result = incfs_read_file_signature(df, + range(sig_buffer, sig_buf_size)); + + if (read_result < 0) { + error = read_result; + goto out; + } + + if (copy_to_user(u64_to_user_ptr(args.file_signature), sig_buffer, + read_result)) { + error = -EFAULT; + goto out; + } + + args.file_signature_len_out = read_result; + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + error = -EFAULT; + +out: + kfree(sig_buffer); + + return error; +} + +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + + switch (req) { + case INCFS_IOC_CREATE_FILE: + return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_READ_FILE_SIGNATURE: + return ioctl_read_file_signature(f, (void __user *)arg); + default: + return -EINVAL; + } +} + +static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, + unsigned int flags) +{ + struct mount_info *mi = get_mount_info(dir_inode->i_sb); + struct dentry *dir_dentry = NULL; + struct dentry *backing_dentry = NULL; + struct path dir_backing_path = {}; + struct inode_info *dir_info = get_incfs_node(dir_inode); + struct mem_range name_range = + range((u8 *)dentry->d_name.name, dentry->d_name.len); + int err = 0; + + if (d_inode(mi->mi_backing_dir_path.dentry) == + dir_info->n_backing_inode) { + /* We do lookup in the FS root. Show pseudo files. */ + + if (incfs_equal_ranges(pending_reads_file_name_range, + name_range)) { + struct inode *inode = fetch_pending_reads_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + + if (incfs_equal_ranges(log_file_name_range, name_range)) { + struct inode *inode = fetch_log_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + } + + dir_dentry = dget_parent(dentry); + get_incfs_backing_path(dir_dentry, &dir_backing_path); + backing_dentry = incfs_lookup_dentry(dir_backing_path.dentry, + dentry->d_name.name); + + if (!backing_dentry || IS_ERR(backing_dentry)) { + err = IS_ERR(backing_dentry) + ? PTR_ERR(backing_dentry) + : -EFAULT; + goto out; + } else { + struct inode *inode = NULL; + struct path backing_path = { + .mnt = dir_backing_path.mnt, + .dentry = backing_dentry + }; + + err = incfs_init_dentry(dentry, &backing_path); + if (err) + goto out; + + if (!d_really_is_positive(backing_dentry)) { + /* + * No such entry found in the backing dir. + * Create a negative entry. + */ + d_add(dentry, NULL); + err = 0; + goto out; + } + + if (d_inode(backing_dentry)->i_sb != + dir_info->n_backing_inode->i_sb) { + /* + * Somehow after the path lookup we ended up in a + * different fs mount. If we keep going it's going + * to end badly. + */ + err = -EXDEV; + goto out; + } + + inode = fetch_regular_inode(dir_inode->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + } + +out: + dput(dir_dentry); + dput(backing_dentry); + path_put(&dir_backing_path); + if (err) + pr_debug("incfs: %s %s %d\n", __func__, + dentry->d_name.name, err); + return ERR_PTR(err); +} + +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct inode_info *dir_node = get_incfs_node(dir); + struct dentry *backing_dentry = NULL; + struct path backing_path = {}; + int err = 0; + + + if (!mi || !dir_node || !dir_node->n_backing_inode) + return -EBADF; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + backing_dentry = backing_path.dentry; + + if (!backing_dentry) { + err = -EBADF; + goto out; + } + + if (backing_dentry->d_parent == mi->mi_index_dir) { + /* Can't create a subdir inside .index */ + err = -EBUSY; + goto out; + } + + inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode); + inode_unlock(dir_node->n_backing_inode); + if (!err) { + struct inode *inode = NULL; + + if (d_really_is_negative(backing_dentry)) { + err = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(dentry, inode); + } + +out: + if (d_really_is_negative(dentry)) + d_drop(dentry); + path_put(&backing_path); + mutex_unlock(&mi->mi_dir_struct_mutex); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + return err; +} + +/* Delete file referenced by backing_dentry and also its hardlink from .index */ +static int final_file_delete(struct mount_info *mi, + struct dentry *backing_dentry) +{ + struct dentry *index_file_dentry = NULL; + /* 2 chars per byte of file ID + 1 char for \0 */ + char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; + ssize_t uuid_size = 0; + int error = 0; + + WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); + uuid_size = vfs_getxattr(backing_dentry, INCFS_XATTR_ID_NAME, + file_id_str, 2 * sizeof(incfs_uuid_t)); + if (uuid_size < 0) { + error = uuid_size; + goto out; + } + + if (uuid_size != 2 * sizeof(incfs_uuid_t)) { + error = -EBADMSG; + goto out; + } + + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + goto out; + } + + error = incfs_unlink(backing_dentry); + if (error) + goto out; + + if (d_really_is_positive(index_file_dentry)) + error = incfs_unlink(index_file_dentry); +out: + if (error) + pr_debug("incfs: delete_file_from_index err:%d\n", error); + return error; +} + +static int dir_unlink(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + struct kstat stat; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry->d_parent == mi->mi_index_dir) { + /* Direct unlink from .index are not allowed. */ + err = -EBUSY; + goto out; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + err = vfs_getattr(&backing_path, &stat); +#else + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, + AT_STATX_SYNC_AS_STAT); +#endif + if (err) + goto out; + + if (stat.nlink == 2) { + /* + * This is the last named link to this file. The only one left + * is in .index. Remove them both now. + */ + err = final_file_delete(mi, backing_path.dentry); + } else { + /* There are other links to this file. Remove just this one. */ + err = incfs_unlink(backing_path.dentry); + } + + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_old_path = {}; + struct path backing_new_path = {}; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + get_incfs_backing_path(old_dentry, &backing_old_path); + get_incfs_backing_path(new_dentry, &backing_new_path); + + if (backing_new_path.dentry->d_parent == mi->mi_index_dir) { + /* Can't link to .index */ + error = -EBUSY; + goto out; + } + + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); + if (!error) { + struct inode *inode = NULL; + struct dentry *bdentry = backing_new_path.dentry; + + if (d_really_is_negative(bdentry)) { + error = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, bdentry); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto out; + } + d_instantiate(new_dentry, inode); + } + +out: + path_put(&backing_old_path); + path_put(&backing_new_path); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static int dir_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry == mi->mi_index_dir) { + /* Can't delete .index */ + err = -EBUSY; + goto out; + } + + err = incfs_rmdir(backing_path.dentry); + if (!err) + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(old_dir->i_sb); + struct dentry *backing_old_dentry; + struct dentry *backing_new_dentry; + struct dentry *backing_old_dir_dentry; + struct dentry *backing_new_dir_dentry; + struct inode *target_inode; + struct dentry *trap; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; + dget(backing_old_dentry); + dget(backing_new_dentry); + + backing_old_dir_dentry = dget_parent(backing_old_dentry); + backing_new_dir_dentry = dget_parent(backing_new_dentry); + target_inode = d_inode(new_dentry); + + if (backing_old_dir_dentry == mi->mi_index_dir) { + /* Direct moves from .index are not allowed. */ + error = -EBUSY; + goto out; + } + + trap = lock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + + if (trap == backing_old_dentry) { + error = -EINVAL; + goto unlock_out; + } + if (trap == backing_new_dentry) { + error = -ENOTEMPTY; + goto unlock_out; + } + + error = vfs_rename(d_inode(backing_old_dir_dentry), backing_old_dentry, + d_inode(backing_new_dir_dentry), backing_new_dentry, + NULL, 0); + if (error) + goto unlock_out; + if (target_inode) + fsstack_copy_attr_all(target_inode, + get_incfs_node(target_inode)->n_backing_inode); + fsstack_copy_attr_all(new_dir, d_inode(backing_new_dir_dentry)); + if (new_dir != old_dir) + fsstack_copy_attr_all(old_dir, d_inode(backing_old_dir_dentry)); + +unlock_out: + unlock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + +out: + dput(backing_new_dir_dentry); + dput(backing_old_dir_dentry); + dput(backing_new_dentry); + dput(backing_old_dentry); + + mutex_unlock(&mi->mi_dir_struct_mutex); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + return error; +} + + +static int file_open(struct inode *inode, struct file *file) +{ + struct mount_info *mi = get_mount_info(inode->i_sb); + struct file *backing_file = NULL; + struct path backing_path = {}; + int err = 0; + + get_incfs_backing_path(file->f_path.dentry, &backing_path); + backing_file = dentry_open(&backing_path, O_RDWR | O_NOATIME, + mi->mi_owner); + path_put(&backing_path); + + if (IS_ERR(backing_file)) { + err = PTR_ERR(backing_file); + backing_file = NULL; + goto out; + } + + if (S_ISREG(inode->i_mode)) + err = make_inode_ready_for_data_ops(mi, inode, backing_file); + else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = NULL; + + dir = incfs_open_dir_file(mi, backing_file); + if (IS_ERR(dir)) + err = PTR_ERR(dir); + else + file->private_data = dir; + } else + err = -EBADF; + +out: + if (err) + pr_debug("incfs: %s name:%s err: %d\n", __func__, + file->f_path.dentry->d_name.name, err); + if (backing_file) + fput(backing_file); + return err; +} + +static int file_release(struct inode *inode, struct file *file) +{ + if (S_ISREG(inode->i_mode)) { + /* Do nothing. + * data_file is released only by inode eviction. + */ + } else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = get_incfs_dir_file(file); + + incfs_free_dir_file(dir); + } + + return 0; +} + +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset) +{ + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_count = size / sizeof(struct incfs_new_data_block); + struct incfs_new_data_block __user *usr_blocks = + (struct incfs_new_data_block __user *)buf; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if (!access_ok(usr_blocks, size)) + return -EFAULT; + + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < block_count; i++) { + struct incfs_new_data_block block = {}; + + if (copy_from_user(&block, &usr_blocks[i], sizeof(block)) > 0) { + error = -EFAULT; + break; + } + + if (block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + if (!access_ok(u64_to_user_ptr(block.data), + block.data_len)) { + error = -EFAULT; + break; + } + if (copy_from_user(data_buf, u64_to_user_ptr(block.data), + block.data_len) > 0) { + error = -EFAULT; + break; + } + block.data = 0; /* To make sure nobody uses it. */ + if (block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + *offset = 0; + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i * sizeof(struct incfs_new_data_block); +} + + +static int dentry_revalidate(struct dentry *d, unsigned int flags) +{ + struct path backing_path = {}; + struct inode_info *info = get_incfs_node(d_inode(d)); + struct inode *binode = (info == NULL) ? NULL : info->n_backing_inode; + struct dentry *backing_dentry = NULL; + int result = 0; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + get_incfs_backing_path(d, &backing_path); + backing_dentry = backing_path.dentry; + if (!backing_dentry) + goto out; + + if (d_inode(backing_dentry) != binode) { + /* + * Backing inodes obtained via dentry and inode don't match. + * It indicates that most likely backing dir has changed + * directly bypassing Incremental FS interface. + */ + goto out; + } + + if (backing_dentry->d_flags & DCACHE_OP_REVALIDATE) { + result = backing_dentry->d_op->d_revalidate(backing_dentry, + flags); + } else + result = 1; + +out: + path_put(&backing_path); + return result; +} + +static void dentry_release(struct dentry *d) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (di) + path_put(&di->backing_path); + d->d_fsdata = NULL; +} + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode_info *node = kzalloc(sizeof(*node), GFP_NOFS); + + /* TODO: add a slab-based cache here. */ + if (!node) + return NULL; + inode_init_once(&node->n_vfs_inode); + return &node->n_vfs_inode; +} + +static void free_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + kfree(node); +} + +static void evict_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + if (node) { + if (node->n_backing_inode) { + iput(node->n_backing_inode); + node->n_backing_inode = NULL; + } + if (node->n_file) { + incfs_free_data_file(node->n_file); + node->n_file = NULL; + } + } + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); +} + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di || !di->backing_path.dentry) + return -ENODATA; + + return vfs_getxattr(di->backing_path.dentry, name, value, size); +} + +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di || !di->backing_path.dentry) + return -ENODATA; + + return vfs_listxattr(di->backing_path.dentry, list, size); +} + +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data) +{ + struct mount_options options = {}; + struct mount_info *mi = NULL; + struct path backing_dir_path = {}; + struct dentry *index_dir; + struct super_block *src_fs_sb = NULL; + struct inode *root_inode = NULL; + struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); + int error = 0; + + if (IS_ERR(sb)) + return ERR_CAST(sb); + + sb->s_op = &incfs_super_ops; + sb->s_d_op = &incfs_dentry_ops; + sb->s_flags |= S_NOATIME; + sb->s_magic = INCFS_MAGIC_NUMBER; + sb->s_time_gran = 1; + sb->s_blocksize = INCFS_DATA_FILE_BLOCK_SIZE; + sb->s_blocksize_bits = blksize_bits(sb->s_blocksize); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) + sb->s_xattr = incfs_xattr_ops; +#endif + + BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); + + error = parse_options(&options, (char *)data); + if (error != 0) { + pr_err("incfs: Options parsing error. %d\n", error); + goto err; + } + + sb->s_bdi->ra_pages = options.readahead_pages; + if (!dev_name) { + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); + error = -ENOENT; + goto err; + } + + error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &backing_dir_path); + if (error || backing_dir_path.dentry == NULL || + !d_really_is_positive(backing_dir_path.dentry)) { + pr_err("incfs: Error accessing: %s.\n", + dev_name); + goto err; + } + src_fs_sb = backing_dir_path.dentry->d_sb; + sb->s_maxbytes = src_fs_sb->s_maxbytes; + + mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); + + if (IS_ERR_OR_NULL(mi)) { + error = PTR_ERR(mi); + pr_err("incfs: Error allocating mount info. %d\n", error); + mi = NULL; + goto err; + } + + index_dir = open_or_create_index_dir(backing_dir_path.dentry); + if (IS_ERR_OR_NULL(index_dir)) { + error = PTR_ERR(index_dir); + pr_err("incfs: Can't find or create .index dir in %s\n", + dev_name); + goto err; + } + mi->mi_index_dir = index_dir; + + sb->s_fs_info = mi; + root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); + if (IS_ERR(root_inode)) { + error = PTR_ERR(root_inode); + goto err; + } + + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) { + error = -ENOMEM; + goto err; + } + error = incfs_init_dentry(sb->s_root, &backing_dir_path); + if (error) + goto err; + + path_put(&backing_dir_path); + sb->s_flags |= SB_ACTIVE; + + pr_debug("infs: mount\n"); + return dget(sb->s_root); +err: + sb->s_fs_info = NULL; + path_put(&backing_dir_path); + incfs_free_mount_info(mi); + deactivate_locked_super(sb); + return ERR_PTR(error); +} + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct mount_options options; + struct mount_info *mi = get_mount_info(sb); + int err = 0; + + sync_filesystem(sb); + err = parse_options(&options, (char *)data); + if (err) + return err; + + if (mi->mi_options.read_timeout_ms != options.read_timeout_ms) { + mi->mi_options.read_timeout_ms = options.read_timeout_ms; + pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); + } + + pr_debug("infs: remount\n"); + return 0; +} + +void incfs_kill_sb(struct super_block *sb) +{ + struct mount_info *mi = sb->s_fs_info; + + pr_debug("infs: unmount\n"); + incfs_free_mount_info(mi); + generic_shutdown_super(sb); +} + +static int show_options(struct seq_file *m, struct dentry *root) +{ + struct mount_info *mi = get_mount_info(root->d_sb); + + seq_printf(m, ",read_timeout_ms=%u", mi->mi_options.read_timeout_ms); + seq_printf(m, ",readahead=%u", mi->mi_options.readahead_pages); + if (mi->mi_options.read_log_pages != 0) { + seq_printf(m, ",rlog_pages=%u", mi->mi_options.read_log_pages); + seq_printf(m, ",rlog_wakeup_cnt=%u", + mi->mi_options.read_log_wakeup_count); + } + if (mi->mi_options.no_backing_file_cache) + seq_puts(m, ",no_bf_cache"); + if (mi->mi_options.no_backing_file_readahead) + seq_puts(m, ",no_bf_readahead"); + return 0; +} diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h new file mode 100644 index 000000000000..eaa490e19072 --- /dev/null +++ b/fs/incfs/vfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +#ifndef _INCFS_VFS_H +#define _INCFS_VFS_H + +void incfs_kill_sb(struct super_block *sb); +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data); + +#endif diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h new file mode 100644 index 000000000000..8a06e2e48fc4 --- /dev/null +++ b/include/uapi/linux/incrementalfs.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace interface for Incremental FS. + * + * Incremental FS is special-purpose Linux virtual file system that allows + * execution of a program while its binary and resource files are still being + * lazily downloaded over the network, USB etc. + * + * Copyright 2019 Google LLC + */ +#ifndef _UAPI_LINUX_INCREMENTALFS_H +#define _UAPI_LINUX_INCREMENTALFS_H + +#include +#include +#include +#include + +/* ===== constants ===== */ +#define INCFS_NAME "incremental-fs" +#define INCFS_MAGIC_NUMBER (0x5346434e49ul) +#define INCFS_DATA_FILE_BLOCK_SIZE 4096 +#define INCFS_HEADER_VER 1 + +// TODO: This value is assumed in incfs_copy_signature_info_from_user to be the +// actual signature length. Set back to 64 when fixed. +#define INCFS_MAX_HASH_SIZE 32 +#define INCFS_MAX_FILE_ATTR_SIZE 512 + +#define INCFS_PENDING_READS_FILENAME ".pending_reads" +#define INCFS_LOG_FILENAME ".log" +#define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") +#define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") +#define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") + +#define INCFS_MAX_SIGNATURE_SIZE 8096 + +#define INCFS_IOCTL_BASE_CODE 'g' + +/* ===== ioctl requests on the command dir ===== */ + +/* Create a new file */ +#define INCFS_IOC_CREATE_FILE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) + +/* Read file signature */ +#define INCFS_IOC_READ_FILE_SIGNATURE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + +enum incfs_compression_alg { + COMPRESSION_NONE = 0, + COMPRESSION_LZ4 = 1 +}; + +enum incfs_block_flags { + INCFS_BLOCK_FLAGS_NONE = 0, + INCFS_BLOCK_FLAGS_HASH = 1, +}; + +typedef struct { + __u8 bytes[16]; +} incfs_uuid_t __attribute__((aligned (8))); + +/* + * Description of a pending read. A pending read - a read call by + * a userspace program for which the filesystem currently doesn't have data. + */ +struct incfs_pending_read_info { + /* Id of a file that is being read from. */ + incfs_uuid_t file_id; + + /* A number of microseconds since system boot to the read. */ + __aligned_u64 timestamp_us; + + /* Index of a file block that is being read. */ + __u32 block_index; + + /* A serial number of this pending read. */ + __u32 serial_number; +}; + +/* + * A struct to be written into a control file to load a data or hash + * block to a data file. + */ +struct incfs_new_data_block { + /* Index of a data block. */ + __u32 block_index; + + /* Length of data */ + __u32 data_len; + + /* + * A pointer to an actual data for the block. + * + * Equivalent to: __u8 *data; + */ + __aligned_u64 data; + + /* + * Compression algorithm used to compress the data block. + * Values from enum incfs_compression_alg. + */ + __u8 compression; + + /* Values from enum incfs_block_flags */ + __u8 flags; + + __u16 reserved1; + + __u32 reserved2; + + __aligned_u64 reserved3; +}; + +enum incfs_hash_tree_algorithm { + INCFS_HASH_TREE_NONE = 0, + INCFS_HASH_TREE_SHA256 = 1 +}; + +struct incfs_file_signature_info { + /* + * A pointer to file's root hash (if determined != 0) + * Actual hash size determined by hash_tree_alg. + * Size of the buffer should be at least INCFS_MAX_HASH_SIZE + * + * Equivalent to: u8 *root_hash; + */ + __aligned_u64 root_hash; + + /* + * A pointer to additional data that was attached to the root hash + * before signing. + * + * Equivalent to: u8 *additional_data; + */ + __aligned_u64 additional_data; + + /* Size of additional data. */ + __u32 additional_data_size; + + __u32 reserved1; + + /* + * A pointer to pkcs7 signature DER blob. + * + * Equivalent to: u8 *signature; + */ + __aligned_u64 signature; + + + /* Size of pkcs7 signature DER blob */ + __u32 signature_size; + + __u32 reserved2; + + /* Value from incfs_hash_tree_algorithm */ + __u8 hash_tree_alg; +}; + +/* + * Create a new file or directory. + */ +struct incfs_new_file_args { + /* Id of a file to create. */ + incfs_uuid_t file_id; + + /* + * Total size of the new file. Ignored if S_ISDIR(mode). + */ + __aligned_u64 size; + + /* + * File mode. Permissions and dir flag. + */ + __u16 mode; + + __u16 reserved1; + + __u32 reserved2; + + /* + * A pointer to a null-terminated relative path to the file's parent + * dir. + * Max length: PATH_MAX + * + * Equivalent to: char *directory_path; + */ + __aligned_u64 directory_path; + + /* + * A pointer to a null-terminated file's name. + * Max length: PATH_MAX + * + * Equivalent to: char *file_name; + */ + __aligned_u64 file_name; + + /* + * A pointer to a file attribute to be set on creation. + * + * Equivalent to: u8 *file_attr; + */ + __aligned_u64 file_attr; + + /* + * Length of the data buffer specfied by file_attr. + * Max value: INCFS_MAX_FILE_ATTR_SIZE + */ + __u32 file_attr_len; + + __u32 reserved4; + + /* struct incfs_file_signature_info *signature_info; */ + __aligned_u64 signature_info; + + __aligned_u64 reserved5; + + __aligned_u64 reserved6; +}; + +/* + * Request a digital signature blob for a given file. + * Argument for INCFS_IOC_READ_FILE_SIGNATURE ioctl + */ +struct incfs_get_file_sig_args { + /* + * A pointer to the data buffer to save an signature blob to. + * + * Equivalent to: u8 *file_signature; + */ + __aligned_u64 file_signature; + + /* Size of the buffer at file_signature. */ + __u32 file_signature_buf_size; + + /* + * Number of bytes save file_signature buffer. + * It is set after ioctl done. + */ + __u32 file_signature_len_out; +}; + +#endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile new file mode 100644 index 000000000000..7cff78cf5131 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4 +CFLAGS += -I../../../../../usr/include/ +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../lib + +EXTRA_SOURCES := utils.c +TEST_GEN_PROGS := incfs_test + +include ../../lib.mk + +$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES) +all: $(OUTPUT)incfs_test + +clean: + rm -rf $(OUTPUT)incfs_test *.o diff --git a/tools/testing/selftests/filesystems/incfs/config b/tools/testing/selftests/filesystems/incfs/config new file mode 100644 index 000000000000..b6749837a318 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/config @@ -0,0 +1 @@ +CONFIG_INCREMENTAL_FS=y \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c new file mode 100644 index 000000000000..9bfb271628a8 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -0,0 +1,2421 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#include "lz4.h" +#include "utils.h" + +#define __packed __attribute__((__packed__)) + +#define TEST_FAILURE 1 +#define TEST_SUCCESS 0 +#define INCFS_MAX_MTREE_LEVELS 8 + +#define INCFS_ROOT_INODE 0 + +struct hash_block { + char data[INCFS_DATA_FILE_BLOCK_SIZE]; +}; + +struct test_signature { + void *data; + size_t size; + + char add_data[100]; + size_t add_data_size; +}; + +struct test_file { + int index; + incfs_uuid_t id; + char *name; + off_t size; + char root_hash[INCFS_MAX_HASH_SIZE]; + struct hash_block *mtree; + int mtree_block_count; + struct test_signature sig; +}; + +struct test_files_set { + struct test_file *files; + int files_count; +}; + +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[0]; +} __packed; + +/* + * The certificate below and the private key were created by calling: + * openssl req -x509 -newkey rsa:4096 -keyout private.key -out cert.crt + * -days 1000 -sha256 -nodes -outform PEM -subj + * "/C=US/ST=WA/L=Kirkland/O=Example/OU=Org/CN=www.example.com" + */ +char x509_cert[] = +"-----BEGIN CERTIFICATE-----\n" +"MIIFvzCCA6egAwIBAgIUXpwqelEljm6BBllRQGHLrls2MYgwDQYJKoZIhvcNAQEL\n" +"BQAwbzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCldhc2hpbmd0b24xETAPBgNVBAcM\n" +"CEtpcmtsYW5kMRAwDgYDVQQKDAdFeGFtcGxlMQwwCgYDVQQLDANPcmcxGDAWBgNV\n" +"BAMMD3d3dy5leGFtcGxlLmNvbTAeFw0xOTA4MDgyMzA3MDZaFw0yMjA1MDQyMzA3\n" +"MDZaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApXYXNoaW5ndG9uMREwDwYDVQQH\n" +"DAhLaXJrbGFuZDEQMA4GA1UECgwHRXhhbXBsZTEMMAoGA1UECwwDT3JnMRgwFgYD\n" +"VQQDDA93d3cuZXhhbXBsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK\n" +"AoICAQC1LuFW/lDV/GflqFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43\n" +"NeeJtqUoVxSLS9wHURjSjD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtA\n" +"uYcY4P9GHQEXYUX+ue82A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt\n" +"4/NXS/Dn+S0/mJlxw34IKfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RAD\n" +"qGewNNCab3ClJDP7/M32BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolV\n" +"gSL1HM2jin5bi4bpFMreY0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBT\n" +"qjjFb3oiSMugJzY+MhISM754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3\n" +"UgC6SyVmZxG2o+AO6m8TRTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiV\n" +"XDmotNb2myXNYHHTjRYNxkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61S\n" +"oxKWi+LGa7B4NaCMjz1LnaOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAb\n" +"uxkq9EYUDg+w9broltiBf4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABo1MwUTAd\n" +"BgNVHQ4EFgQUo6JN3gY2yGbzOTNj8Al7hNB3rw0wHwYDVR0jBBgwFoAUo6JN3gY2\n" +"yGbzOTNj8Al7hNB3rw0wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n" +"AgEAQb3pJqOzM4whfNVdpEOswd1EApcWNM1ps9iTlEEjDoRv9F7F1PW0uXCIpk3B\n" +"j5JgCmIxAcPnzj42rduRSx421hHMZhbAIWI/JL4ZSF64qlG0YrmJDXlJgSMoyst5\n" +"biUqeWgO7Js5udPt3zhkeA62z3hGM6dE5B3k7gHTaKKtK17+UeR9imZKsOK8GBnM\n" +"rxMPI6XghxxAK2OQ/r09DHDiyf/GxgOE46oknfXfMPx3HaSvDKrZUTZ+UvVbM5c2\n" +"5eXOgH5UO/e4llLknJK7CoP/R6G7pV44iT4t4t9FMnvCYvavAHwfR+6z5vTF3o8a\n" +"wd80fC8z1vfLsIPLROdzBl9rGCvv536fPiEA677CM1AZkjfT0a9DVzrE1NDvuCUF\n" +"0KgEdiNwux+hO6dbTyiS38yPT6TbpoWJptJmFhFkC4hGvUgoX/TI0covSyf74VRH\n" +"k3BHojOBMYiX1K66xoN7fhlGK8cith3L0XXPB8CgSEUPWURvm8RCaGuX2T3FZomF\n" +"BCnNpN+WNnN3Yf4OkjtuvtxxktUU7pfVLsUxrdpo/ph4rWm6U83VT/Zlq92aF4vW\n" +"QJ+7uraQFip7e+Gy9g3UJINm3B7b1C4ch/Z/upCZESOI/23sVGzkfTgOrS+23i6/\n" +"Vi9YW75zySC2FCa1AWMS1NmS5qfDSycJUgD6YvOUg0C54ZI=\n" +"-----END CERTIFICATE-----"; + +char private_key[] = +"-----BEGIN PRIVATE KEY-----\n" +"MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1LuFW/lDV/Gfl\n" +"qFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43NeeJtqUoVxSLS9wHURjS\n" +"jD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtAuYcY4P9GHQEXYUX+ue82\n" +"A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt4/NXS/Dn+S0/mJlxw34I\n" +"KfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RADqGewNNCab3ClJDP7/M32\n" +"BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolVgSL1HM2jin5bi4bpFMre\n" +"Y0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBTqjjFb3oiSMugJzY+MhIS\n" +"M754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3UgC6SyVmZxG2o+AO6m8T\n" +"RTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiVXDmotNb2myXNYHHTjRYN\n" +"xkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61SoxKWi+LGa7B4NaCMjz1L\n" +"naOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAbuxkq9EYUDg+w9broltiB\n" +"f4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABAoICAQCMKul/0J2e/ncub6t2t4dr\n" +"PnTrfCT6xKqPqciny4Ee6hr9So1jR2gvink380bd/mQFMmEdZqGhM3cdpAzLf82f\n" +"hu7BSNxsYIF0er0PB4MZFMJ4sMaXC+zp5/TJnP5MG/zBND0c5k8tQpEyWy8O28Jj\n" +"FKW/0F5P90Q0ncP20EJUS50tXgniOMsU2Prtw/UE6yZDgD0mPxsurMu66ycXSFwM\n" +"WqyfqEeBk7lw/AjR6Sft71W31lTbl+DclG0MN2OIKUPcxiwCRmDFKI36MDgERk1x\n" +"sMPfdrWRLj2ryDFTUuLAWBTOVEGWS0RdRsWWVaJCuHbKd6FLl0TW2xQbOfWDTjYC\n" +"Ps31ejh163qdbk7OGOZIbd83fP3jsyL+4eNzhUpeXMKhfG58mFIv4yhdZIUOpuL6\n" +"aqnoU9z9wEsJKj/SrKr3nw6tuTnmbXgNjun9LfTFmqqDRBYd0Okiprw6jHNM1jgA\n" +"GG0kC/K7r89jKymVDABwGMFCS33ynR1Tb6zG+cqgNMPw19Fy3uQuW21CjqSzCOyP\n" +"aEVCEUZeP+ofql5+7ZKi6Dj+EdTfeKt2ihgheHZZoaYSINb8tsnKbdJhwBfW9PFT\n" +"aT/hu3bnO2FPC8H2NGOqxOEeel9ALU4SFu1pOknEhiL3/mNfOQ+KgrSRDtNRlcL0\n" +"cto05J90u0cmqwWKlshfaQKCAQEA5dcklxs4ezyzt28NcsiyS02oZ+9TkQp6pCXV\n" +"kx7AwhivAmVTlJ+c6BegA5EPd7A1gknM3+EKzGpoBOqmlF45G57phVIAphAp4oCH\n" +"UOVtIQgM8p4EU2gtX+uNOopdYlpBQnWimXaHA2sOD9/yTbZ03j/McRH6D15+iCld\n" +"3880GHdZaYYbQmHoSDg39LRRO1bdS3WC0oKBD2gPi3K0b9RaZSwKzuVrmlvrLURj\n" +"WMZfmkGl4BsITfuoTxbWFVncG3Kb9eYkYUFZy4M2G/s849PS/HjrN7BvgpanjtVp\n" +"1/39APQfAYfUuBPbKYnb6F8dE0pb5cVd4uMZklAeTb3bXjOO9QKCAQEAyc4CxWXr\n" +"bG6Do5dGpWudQ7ucq00MR0T3MHQIu5XTn6BsPHAJ9ZgrQw9C24PXm2VEjjsrMs5T\n" +"rHNF9oeO39s25Za1iyJ+893icqA3h3ivCUOOoVE54BkuJK6REhkXPD5G1ubmxeBz\n" +"MKNehlpd/eSbJJArkzKFZ8sBtLt8i9VFhRnXSpDAbiMpCbjW+bem9MWdLmkenSnu\n" +"OUbnqYcJhFBCvOT7ZCHFCDNUNPfHcaReSY2EYjw0ZqtqAZD0Q+DL+RkLz7l1+/bF\n" +"eEwNjmjFTcwRyawqf38D4miU0H6ca16FkeSlbmM5p3HdwZK2HVYYz3FSwhox6Ebd\n" +"n6in42qfL4Ug6wKCAQAh9IDRWhIkErmyNdPUy1WbzmM8x5ye5t9rdLNywq5TfnYM\n" +"co/AezwhBax8GmgglIWzM9fykzqXLHklkMz/SlRBgl6ZdZ3m6qhlb/uNtfdDU/8l\n" +"sLaO4+sgKpp4tYxKRW8ytFJLPbmAhcZUDg+r73KgiuhXJAK/VoR29TWLJP9bRfaN\n" +"omRQkEpSsQuDOUhu7cxPo5KqKuGKNyNkxJNnmgWowLLwEfCtozrBO0M6EER7c4tf\n" +"6l51tuIMnSEPknD0FSB5WYCyZYcwi7fotlsuhVK8PdjyJzyyHDOw5FJ4uGsyQt55\n" +"yWlhsH1GS7mTQMn42Zlt/pR6OnbCqNdxQMUxy4gpAoIBAFvMbs5E0pb8nr0n72cI\n" +"UP2itl3mKpOw95D+94n9WcrfOt0zShSCKAvVQWCB1O5HXqwklj4CRWXI+iZu+7sx\n" +"CQPfTq3//ygH4x6paxkg+N6J8LPJMz6Rtb/R+QP2je9FlQvk9U1GEKArcLBFI0R/\n" +"XWOAgZHwBWd1nU0NjFY/qeQmIR02Q5LWQ7C8eG4X8MafriSShO6RSGCdtHwVhWq+\n" +"59ztfL3L7skQMFn37K3xS0LCMVpOcLfTeeFEgxjthVvG3OydPOJlGubiEbiaSEZf\n" +"cif/PUXKDYZMdIVzUsw0ryXykJ5qXKuizHFlv5oQtDCJKFBLgjBbLC2YluaIdekz\n" +"8gkCggEBAJWxS7EuB/qL7fOz0o3HRy0plR3qbwZ0pLoCz0Ii7WxraBS1yQwmxif1\n" +"Rgv89GyFqg1yQl3CSrMiw7oC9WxxxuiEZDO18c4KO3NTv9K4itN9OPQVBTHmEhod\n" +"KWcyP4/W/Sfuae77PyclSqUsAARRrKYn2fpLTS5ibaU0QZgHmdPgYDUrPr+6PHKK\n" +"ZfQKU2uBfuo6zoMbMmFi3UYG49j9rv4d6v+44vS1MPHV9JK/LD8YfBhgx8Pg/u6D\n" +"nUgipS48pkGjJr2u2Vu7Mx70vqz0Yf2neyyDbdLtkYauC4w7YKPTD0yzDJyGuAeB\n" +"GyPbW1yZa5vE302a1Cr0Cd7RC4AFAAw=\n" +"-----END PRIVATE KEY-----"; + +struct test_files_set get_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 5, + .name = "file_five", + .size = 500 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 6, + .name = "file_six", + .size = 600 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 7, + .name = "file_seven", + .size = 700 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 8, + .name = "file_eight", + .size = 800 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 9, + .name = "file_nine", + .size = 900 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 10, .name = "file_big", .size = 500 * 1024 * 1024 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +struct test_files_set get_small_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +static int get_file_block_seed(int file, int block) +{ + return 7919 * file + block; +} + +static loff_t min(loff_t a, loff_t b) +{ + return a < b ? a : b; +} + +static pid_t flush_and_fork(void) +{ + fflush(stdout); + return fork(); +} + +static void print_error(char *msg) +{ + ksft_print_msg("%s: %s\n", msg, strerror(errno)); +} + +static int wait_for_process(pid_t pid) +{ + int status; + int wait_res; + + wait_res = waitpid(pid, &status, 0); + if (wait_res <= 0) { + print_error("Can't wait for the child"); + return -EINVAL; + } + if (!WIFEXITED(status)) { + ksft_print_msg("Unexpected child status pid=%d\n", pid); + return -EINVAL; + } + status = WEXITSTATUS(status); + if (status != 0) + return status; + return 0; +} + +static void rnd_buf(uint8_t *data, size_t len, unsigned int seed) +{ + int i; + + for (i = 0; i < len; i++) { + seed = 1103515245 * seed + 12345; + data[i] = (uint8_t)(seed >> (i % 13)); + } +} + +char *bin2hex(char *dst, const void *src, size_t count) +{ + const unsigned char *_src = src; + static const char hex_asc[] = "0123456789abcdef"; + + while (count--) { + unsigned char x = *_src++; + + *dst++ = hex_asc[(x & 0xf0) >> 4]; + *dst++ = hex_asc[(x & 0x0f)]; + } + *dst = 0; + return dst; +} + +static char *get_index_filename(char *mnt_dir, incfs_uuid_t id) +{ + char path[FILENAME_MAX]; + char str_id[1 + 2 * sizeof(id)]; + + bin2hex(str_id, id.bytes, sizeof(id.bytes)); + snprintf(path, ARRAY_SIZE(path), "%s/.index/%s", mnt_dir, str_id); + + return strdup(path); +} + +int open_file_by_id(char *mnt_dir, incfs_uuid_t id) +{ + char *path = get_index_filename(mnt_dir, id); + int fd = open(path, O_RDWR); + + free(path); + if (fd < 0) { + print_error("Can't open file by id."); + return -errno; + } + + return fd; +} + +int get_file_attr(char *mnt_dir, incfs_uuid_t id, char *value, int size) +{ + char *path = get_index_filename(mnt_dir, id); + int res; + + res = getxattr(path, INCFS_XATTR_METADATA_NAME, value, size); + if (res < 0) + res = -errno; + + free(path); + return res; +} + +static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) +{ + return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); +} + +static int emit_test_blocks(char *mnt_dir, struct test_file *file, + int blocks[], int count) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t comp_data[2 * INCFS_DATA_FILE_BLOCK_SIZE]; + int block_count = (count > 32) ? 32 : count; + int data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE * block_count; + uint8_t *data_buf = malloc(data_buf_size); + uint8_t *current_data = data_buf; + uint8_t *data_end = data_buf + data_buf_size; + struct incfs_new_data_block *block_buf = + calloc(block_count, sizeof(*block_buf)); + ssize_t write_res = 0; + int fd; + int error = 0; + int i = 0; + int blocks_written = 0; + + fd = open_file_by_id(mnt_dir, file->id); + if (fd <= 0) { + error = -errno; + goto out; + } + + for (i = 0; i < block_count; i++) { + int block_index = blocks[i]; + bool compress = (file->index + block_index) % 2 == 0; + int seed = get_file_block_seed(file->index, block_index); + off_t block_offset = + ((off_t)block_index) * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = 0; + + if (block_offset > file->size) { + error = -EINVAL; + break; + } + if (file->size - block_offset > + INCFS_DATA_FILE_BLOCK_SIZE) + block_size = INCFS_DATA_FILE_BLOCK_SIZE; + else + block_size = file->size - block_offset; + + rnd_buf(data, block_size, seed); + if (compress) { + size_t comp_size = LZ4_compress_default( + (char *)data, (char *)comp_data, block_size, + ARRAY_SIZE(comp_data)); + + if (comp_size <= 0) { + error = -EBADMSG; + break; + } + if (current_data + comp_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, comp_data, comp_size); + block_size = comp_size; + block_buf[i].compression = COMPRESSION_LZ4; + } else { + if (current_data + block_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, data, block_size); + block_buf[i].compression = COMPRESSION_NONE; + } + + block_buf[i].block_index = block_index; + block_buf[i].data_len = block_size; + block_buf[i].data = ptr_to_u64(current_data); + block_buf[i].compression = + compress ? COMPRESSION_LZ4 : COMPRESSION_NONE; + current_data += block_size; + } + + if (!error) { + write_res = write(fd, block_buf, sizeof(*block_buf) * i); + if (write_res < 0) + error = -errno; + else + blocks_written = write_res / sizeof(*block_buf); + } + if (error) { + ksft_print_msg( + "Writing data block error. Write returned: %d. Error:%s\n", + write_res, strerror(-error)); + } + +out: + free(block_buf); + free(data_buf); + close(fd); + return (error < 0) ? error : blocks_written; +} + +static int emit_test_block(char *mnt_dir, struct test_file *file, + int block_index) +{ + int res = emit_test_blocks(mnt_dir, file, &block_index, 1); + + if (res == 0) + return -EINVAL; + if (res == 1) + return 0; + return res; +} + +static void shuffle(int array[], int count, unsigned int seed) +{ + int i; + + for (i = 0; i < count - 1; i++) { + int items_left = count - i; + int shuffle_index; + int v; + + seed = 1103515245 * seed + 12345; + shuffle_index = i + seed % items_left; + + v = array[shuffle_index]; + array[shuffle_index] = array[i]; + array[i] = v; + } +} + +static int emit_test_file_data(char *mount_dir, struct test_file *file) +{ + int i; + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int result = 0; + int blocks_written = 0; + + if (file->size == 0) + return 0; + + block_indexes = calloc(block_cnt, sizeof(*block_indexes)); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + shuffle(block_indexes, block_cnt, file->index); + + for (i = 0; i < block_cnt; i += blocks_written) { + blocks_written = emit_test_blocks(mount_dir, file, + block_indexes + i, block_cnt - i); + if (blocks_written < 0) { + result = blocks_written; + goto out; + } + if (blocks_written == 0) { + result = -EIO; + goto out; + } + } +out: + free(block_indexes); + return result; +} + +static loff_t read_whole_file(char *filename) +{ + int fd = -1; + loff_t result; + loff_t bytes_read = 0; + uint8_t buff[16 * 1024]; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + while (1) { + int read_result = read(fd, buff, ARRAY_SIZE(buff)); + + if (read_result < 0) { + print_error("Error during reading from a file."); + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static int read_test_file(uint8_t *buf, size_t len, char *filename, + int block_idx) +{ + int fd = -1; + int result; + int bytes_read = 0; + size_t bytes_to_read = len; + off_t offset = ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + if (lseek(fd, offset, SEEK_SET) != offset) { + print_error("Seek error"); + return -errno; + } + + while (bytes_read < bytes_to_read) { + int read_result = + read(fd, buf + bytes_read, bytes_to_read - bytes_read); + if (read_result < 0) { + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static char *create_backing_dir(char *mount_dir) +{ + struct stat st; + char backing_dir_name[255]; + + snprintf(backing_dir_name, ARRAY_SIZE(backing_dir_name), "%s-src", + mount_dir); + + if (stat(backing_dir_name, &st) == 0) { + if (S_ISDIR(st.st_mode)) { + int error = delete_dir_tree(backing_dir_name); + + if (error) { + ksft_print_msg( + "Can't delete existing backing dir. %d\n", + error); + return NULL; + } + } else { + if (unlink(backing_dir_name)) { + print_error("Can't clear backing dir"); + return NULL; + } + } + } + + if (mkdir(backing_dir_name, 0777)) { + if (errno != EEXIST) { + print_error("Can't open/create backing dir"); + return NULL; + } + } + + return strdup(backing_dir_name); +} + +static int validate_test_file_content_with_seed(char *mount_dir, + struct test_file *file, + unsigned int shuffle_seed) +{ + int error = -1; + char *filename = concat_file_name(mount_dir, file->name); + off_t size = file->size; + loff_t actual_size = get_file_size(filename); + int block_cnt = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int i; + + block_indexes = alloca(sizeof(int) * block_cnt); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + + if (shuffle_seed != 0) + shuffle(block_indexes, block_cnt, shuffle_seed); + + if (actual_size != size) { + ksft_print_msg( + "File size doesn't match. name: %s expected size:%ld actual size:%ld\n", + filename, size, actual_size); + error = -1; + goto failure; + } + + for (i = 0; i < block_cnt; i++) { + int block_idx = block_indexes[i]; + uint8_t expected_block[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t actual_block[INCFS_DATA_FILE_BLOCK_SIZE]; + int seed = get_file_block_seed(file->index, block_idx); + size_t bytes_to_compare = min( + (off_t)INCFS_DATA_FILE_BLOCK_SIZE, + size - ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE); + int read_result = + read_test_file(actual_block, INCFS_DATA_FILE_BLOCK_SIZE, + filename, block_idx); + if (read_result < 0) { + ksft_print_msg( + "Error reading block %d from file %s. Error: %s\n", + block_idx, filename, strerror(-read_result)); + error = read_result; + goto failure; + } + rnd_buf(expected_block, INCFS_DATA_FILE_BLOCK_SIZE, seed); + if (memcmp(expected_block, actual_block, bytes_to_compare)) { + ksft_print_msg( + "File contents don't match. name: %s block:%d\n", + file->name, block_idx); + error = -2; + goto failure; + } + } + free(filename); + return 0; + +failure: + free(filename); + return error; +} + +static int validate_test_file_content(char *mount_dir, struct test_file *file) +{ + return validate_test_file_content_with_seed(mount_dir, file, 0); +} + +static int data_producer(char *mount_dir, struct test_files_set *test_set) +{ + int ret = 0; + int timeout_ms = 1000; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int fd = open_commands_file(mount_dir); + + if (fd < 0) + return -errno; + + while ((ret = wait_for_pending_reads(fd, timeout_ms, prs, prs_size)) > + 0) { + int read_count = ret; + int i; + + for (i = 0; i < read_count; i++) { + int j = 0; + struct test_file *file = NULL; + + for (j = 0; j < test_set->files_count; j++) { + bool same = same_id(&(test_set->files[j].id), + &(prs[i].file_id)); + + if (same) { + file = &test_set->files[j]; + break; + } + } + if (!file) { + ksft_print_msg( + "Unknown file in pending reads.\n"); + break; + } + + ret = emit_test_block(mount_dir, file, + prs[i].block_index); + if (ret < 0) { + ksft_print_msg("Emitting test data error: %s\n", + strerror(-ret)); + break; + } + } + } + close(fd); + return ret; +} + +static int build_mtree(struct test_file *file) +{ + char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + int block_count = 0; + int hash_block_count = 0; + int total_tree_block_count = 0; + int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; + int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; + int levels_count = 0; + char data_to_sign[256] = {}; + int sig_data_size; + int i, level; + + if (file->size == 0) + return 0; + + block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + hash_block_count = block_count; + for (i = 0; hash_block_count > 1; i++) { + hash_block_count = (hash_block_count + hash_per_block - 1) + / hash_per_block; + tree_lvl_count[i] = hash_block_count; + total_tree_block_count += hash_block_count; + } + levels_count = i; + + for (i = 0; i < levels_count; i++) { + int prev_lvl_base = (i == 0) ? total_tree_block_count : + tree_lvl_index[i - 1]; + + tree_lvl_index[i] = prev_lvl_base - tree_lvl_count[i]; + } + + file->mtree_block_count = total_tree_block_count; + if (block_count == 1) { + int seed = get_file_block_seed(file->index, 0); + + rnd_buf((uint8_t *)data, file->size, seed); + sha256(data, file->size, file->root_hash); + return 0; + } + + file->mtree = calloc(total_tree_block_count, sizeof(*file->mtree)); + /* Build level 0 hashes. */ + for (i = 0; i < block_count; i++) { + off_t offset = i * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = INCFS_DATA_FILE_BLOCK_SIZE; + int block_index = tree_lvl_index[0] + + i / hash_per_block; + int block_off = (i % hash_per_block) * digest_size; + int seed = get_file_block_seed(file->index, i); + char *hash_ptr = file->mtree[block_index].data + block_off; + + if (file->size - offset < block_size) + block_size = file->size - offset; + + rnd_buf((uint8_t *)data, block_size, seed); + sha256(data, block_size, hash_ptr); + } + + /* Build higher levels of hash tree. */ + for (level = 1; level < levels_count; level++) { + int prev_lvl_base = tree_lvl_index[level - 1]; + int prev_lvl_count = tree_lvl_count[level - 1]; + + for (i = 0; i < prev_lvl_count; i++) { + int block_index = + i / hash_per_block + tree_lvl_index[level]; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = + file->mtree[block_index].data + block_off; + + sha256(file->mtree[i + prev_lvl_base].data, + INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + } + + /* Calculate root hash from the top block */ + sha256(file->mtree[0].data, + INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + + /* Calculating digital signature */ + snprintf(file->sig.add_data, sizeof(file->sig.add_data), "%ld", + file->size); + memcpy(data_to_sign, file->root_hash, SHA256_DIGEST_SIZE); + memcpy(data_to_sign + SHA256_DIGEST_SIZE, file->sig.add_data, + strlen(file->sig.add_data)); + sig_data_size = SHA256_DIGEST_SIZE + strlen(file->sig.add_data); + if (!sign_pkcs7(data_to_sign, sig_data_size, private_key, x509_cert, + &file->sig.data, &file->sig.size)) { + ksft_print_msg("Signing failed.\n"); + return -EINVAL; + } + + return 0; +} + +static int load_hash_tree(const char *mount_dir, struct test_file *file) +{ + int err; + int i; + int fd; + + size_t blocks_size = + file->mtree_block_count * sizeof(struct incfs_new_data_block); + struct incfs_new_data_block *blocks = NULL; + char *file_path; + + if (blocks_size == 0) + return 0; + + blocks = malloc(blocks_size); + if (!blocks) + return -ENOMEM; + + for (i = 0; i < file->mtree_block_count; i++) { + blocks[i] = (struct incfs_new_data_block){ + .block_index = i, + .data_len = INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(file->mtree[i].data), + .flags = INCFS_BLOCK_FLAGS_HASH + }; + } + + file_path = concat_file_name(mount_dir, file->name); + fd = open(file_path, O_RDWR); + free(file_path); + if (fd < 0) { + err = errno; + goto failure; + } + + err = write(fd, blocks, blocks_size); + close(fd); + + if (err < blocks_size) + err = errno; + else { + err = 0; + free(file->mtree); + } + +failure: + free(blocks); + return err; +} + +static int cant_touch_index_test(char *mount_dir) +{ + char *file_name = "test_file"; + int file_size = 123; + incfs_uuid_t file_id; + char *index_path = concat_file_name(mount_dir, ".index"); + char *subdir = concat_file_name(index_path, "subdir"); + char *dst_name = concat_file_name(mount_dir, "something"); + char *filename_in_index = NULL; + char *file_path = concat_file_name(mount_dir, file_name); + char *backing_dir; + int cmd_fd = -1; + int err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + + err = mkdir(subdir, 0777); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to crate subdir in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, ".index", file_name, &file_id, + file_size, NULL); + if (err != -EBUSY) { + print_error("Shouldn't be able to crate a file in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, NULL, file_name, &file_id, + file_size, NULL); + if (err < 0) + goto failure; + filename_in_index = get_index_filename(mount_dir, file_id); + + err = unlink(filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be delete from index\n"); + goto failure; + } + + + err = rename(filename_in_index, dst_name); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to move from index\n"); + goto failure; + } + + free(filename_in_index); + filename_in_index = concat_file_name(index_path, "abc"); + err = link(file_path, filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to link inside index\n"); + goto failure; + } + + close(cmd_fd); + free(subdir); + free(index_path); + free(dst_name); + free(filename_in_index); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + free(subdir); + free(dst_name); + free(index_path); + free(filename_in_index); + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) +{ + struct expected_name { + const char *name; + bool root_only; + bool found; + } names[] = { + {INCFS_LOG_FILENAME, true, false}, + {INCFS_PENDING_READS_FILENAME, true, false}, + {".index", true, false}, + {"..", false, false}, + {".", false, false}, + }; + + bool pass = true, found; + int i; + + /* Test directory iteration */ + int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY); + + if (fd < 0) { + print_error("Can't open directory\n"); + return false; + } + + for (;;) { + /* Enough space for one dirent - no name over 30 */ + char buf[sizeof(struct linux_dirent64) + NAME_MAX]; + struct linux_dirent64 *dirent = (struct linux_dirent64 *) buf; + int nread; + int i; + + for (i = 0; i < NAME_MAX; ++i) { + nread = syscall(__NR_getdents64, fd, buf, + sizeof(struct linux_dirent64) + i); + + if (nread >= 0) + break; + if (errno != EINVAL) + break; + } + + if (nread == 0) + break; + if (nread < 0) { + print_error("Error iterating directory\n"); + pass = false; + goto failure; + } + + /* Expected size is rounded up to 8 byte boundary. Not sure if + * this is universal truth or just happenstance, but useful test + * for the moment + */ + if (nread != (((sizeof(struct linux_dirent64) + + strlen(dirent->d_name) + 1) + 7) & ~7)) { + print_error("Wrong dirent size"); + pass = false; + goto failure; + } + + found = false; + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) + if (!strcmp(dirent->d_name, names[i].name)) { + if (names[i].root_only && !root) { + print_error("Root file error"); + pass = false; + goto failure; + } + + if (names[i].found) { + print_error("File appears twice"); + pass = false; + goto failure; + } + + names[i].found = true; + found = true; + break; + } + + if (!found) + --file_count; + } + + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) { + if (!names[i].found) + if (root || !names[i].root_only) { + print_error("Expected file not present"); + pass = false; + goto failure; + } + } + + if (file_count) { + print_error("Wrong number of files\n"); + pass = false; + goto failure; + } + +failure: + close(fd); + return pass; +} + +static int basic_file_ops_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + char *subdir1 = concat_file_name(mount_dir, "subdir1"); + char *subdir2 = concat_file_name(mount_dir, "subdir2"); + char *backing_dir; + int cmd_fd = -1; + int i, err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + err = mkdir(subdir1, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir1\n"); + goto failure; + } + + err = mkdir(subdir2, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir2\n"); + goto failure; + } + + /* Create all test files in subdir1 directory */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + loff_t size; + char *file_path = concat_file_name(subdir1, file->name); + + err = emit_file(cmd_fd, "subdir1", file->name, &file->id, + file->size, NULL); + if (err < 0) + goto failure; + + size = get_file_size(file_path); + free(file_path); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + } + + if (!iterate_directory(subdir1, false, file_num)) + goto failure; + + /* Link the files to subdir2 */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir1, file->name); + char *dst_name = concat_file_name(subdir2, file->name); + loff_t size; + + err = link(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* Move the files from subdir2 to the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir2, file->name); + char *dst_name = concat_file_name(mount_dir, file->name); + loff_t size; + + err = rename(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* +2 because there are 2 subdirs */ + if (!iterate_directory(mount_dir, true, file_num + 2)) + goto failure; + + /* Open and close all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(path, O_RDWR); + free(path); + if (fd <= 0) { + print_error("Can't open file"); + goto failure; + } + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + } + + /* Delete all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + + err = unlink(path); + free(path); + if (err < 0) { + print_error("Can't unlink file"); + goto failure; + } + } + + err = delete_dir_tree(subdir1); + if (err) { + ksft_print_msg("Error deleting subdir1 %d", err); + goto failure; + } + + err = rmdir(subdir2); + if (err) { + print_error("Error deleting subdir2"); + goto failure; + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int dynamic_files_and_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int missing_file_idx = 5; + int cmd_fd = -1; + char *backing_dir; + int i; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Check that test files don't exist in the filesystem. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg( + "File %s somehow already exists in a clean FS.\n", + filename); + goto failure; + } + free(filename); + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res < 0) { + ksft_print_msg("Error %s emiting file %s.\n", + strerror(-res), file->name); + goto failure; + } + + /* Skip writing data to one file so we can check */ + /* that it's missing later. */ + if (i == missing_file_idx) + continue; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + + res = emit_test_file_data(mount_dir, file); + if (res) { + ksft_print_msg("Error %s emiting data for %s.\n", + strerror(-res), file->name); + goto failure; + } + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == missing_file_idx) { + /* No data has been written to this file. */ + /* Check for read error; */ + uint8_t buf; + char *filename = + concat_file_name(mount_dir, file->name); + int res = read_test_file(&buf, 1, filename, 0); + + free(filename); + if (res > 0) { + ksft_print_msg( + "Data present, even though never writtern.\n"); + goto failure; + } + if (res != -ETIME) { + ksft_print_msg("Wrong error code: %d.\n", res); + goto failure; + } + } else { + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int concurrent_reads_and_writes_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + /* Validate each file from that many child processes. */ + const int child_multiplier = 3; + int cmd_fd = -1; + char *backing_dir; + int status; + int i; + pid_t producer_pid; + pid_t *child_pids = alloca(child_multiplier * file_num * sizeof(pid_t)); + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res) + goto failure; + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content_with_seed( + mount_dir, file, i); + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + producer_pid = flush_and_fork(); + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else { + status = wait_for_process(producer_pid); + if (status != 0) { + ksft_print_msg("Data produces failed. %d(%s) ", status, + strerror(status)); + goto failure; + } + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + + status = wait_for_process(child_pids[i]); + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + /* Check that there are no pending reads left */ + { + struct incfs_pending_read_info prs[1] = {}; + int timeout = 0; + int read_count = wait_for_pending_reads(cmd_fd, timeout, prs, + ARRAY_SIZE(prs)); + + if (read_count) { + ksft_print_msg( + "Pending reads pending when all data written\n"); + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int work_after_remount_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int file_num_stage1 = file_num / 2; + const int file_num_stage2 = file_num; + char *backing_dir = NULL; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write first half of the data into the command file. (stage 1) */ + for (i = 0; i < file_num_stage1; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } +} + + /* Unmount and mount again, to see that data is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write the second half of the data into the command file. (stage 2) */ + for (; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + int res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + + if (res) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Delete all files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + char *filename_in_index = get_index_filename(mount_dir, + file->id); + + if (access(filename, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", + filename_in_index); + goto failure; + } + + unlink(filename); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still present.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is still present.\n", + filename_in_index); + goto failure; + } + free(filename); + free(filename_in_index); + } + + /* Unmount and mount again, to see that deleted files stay deleted. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate all deleted files are still deleted. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still visible.\n", filename); + goto failure; + } + free(filename); + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int attribute_test(char *mount_dir) +{ + char file_attr[] = "metadata123123"; + char attr_buf[INCFS_MAX_FILE_ATTR_SIZE] = {}; + int cmd_fd = -1; + incfs_uuid_t file_id; + int attr_res = 0; + char *backing_dir; + + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + if (emit_file(cmd_fd, NULL, "file", &file_id, 12, file_attr)) + goto failure; + + /* Test attribute values */ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get file attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Unmount and mount again, to see that attributes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Test attribute values again after remount*/ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get dir attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int child_procs_waiting_for_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int cmd_fd = -1; + int i; + pid_t *child_pids = alloca(file_num * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content(mount_dir, file); + + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int status = wait_for_process(child_pids[i]); + + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int multiple_providers_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int producer_count = 5; + int cmd_fd = -1; + int status; + int i; + pid_t *producer_pids = alloca(producer_count * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL) < 0) + goto failure; + } + + /* Start producer processes */ + for (i = 0; i < producer_count; i++) { + pid_t producer_pid = flush_and_fork(); + + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else if (producer_pid > 0) { + producer_pids[i] = producer_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Validate FS content */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + loff_t read_result = read_whole_file(filename); + + free(filename); + if (read_result != file->size) { + ksft_print_msg( + "Error validating file %s. Result: %ld\n", + file->name, read_result); + goto failure; + } + } + + /* Check that all producers has finished with 0 exit status */ + for (i = 0; i < producer_count; i++) { + status = wait_for_process(producer_pids[i]); + if (status != 0) { + ksft_print_msg("Producer %d failed with code (%s)\n", i, + strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int signature_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + unsigned char sig_buf[INCFS_MAX_SIGNATURE_SIZE]; + char *backing_dir; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); + + if (res) { + ksft_print_msg("Emit failed for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Unmount and mount again, to make sure the signature is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int hash_tree_test(char *mount_dir) +{ + char *backing_dir; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int corrupted_file_idx = 5; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); + + if (i == corrupted_file_idx) { + /* Corrupt third blocks hash */ + file->mtree[0].data[2 * SHA256_DIGEST_SIZE] ^= 0xff; + } + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed1. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Unmount and mount again, to that hashes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed2. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int res; + int read_count; + int i; + char *filename = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(filename, O_RDONLY); + free(filename); + if (fd <= 0) + return TEST_FAILURE; + + if (block_cnt > prs_size) + block_cnt = prs_size; + + for (i = 0; i < block_cnt; i++) { + res = pread(fd, data, sizeof(data), + INCFS_DATA_FILE_BLOCK_SIZE * i); + if (res <= 0) + goto failure; + } + + read_count = wait_for_pending_reads(log_fd, 0, prs, prs_size); + if (read_count < 0) { + ksft_print_msg("Error reading logged reads %s.\n", + strerror(-read_count)); + goto failure; + } + + if (read_count != block_cnt) { + ksft_print_msg("Bad log read count %s %d %d.\n", file->name, + read_count, block_cnt); + goto failure; + } + + for (i = 0; i < read_count; i++) { + struct incfs_pending_read_info *read = &prs[i]; + + if (!same_id(&read->file_id, &file->id)) { + ksft_print_msg("Bad log read ino %s\n", file->name); + goto failure; + } + + if (read->block_index != i) { + ksft_print_msg("Bad log read ino %s %d %d.\n", + file->name, read->block_index, i); + goto failure; + } + + if (i != 0) { + unsigned long psn = prs[i - 1].serial_number; + + if (read->serial_number != psn + 1) { + ksft_print_msg("Bad log read sn %s %d %d.\n", + file->name, read->serial_number, + psn); + goto failure; + } + } + + if (read->timestamp_us == 0) { + ksft_print_msg("Bad log read timestamp %s.\n", + file->name); + goto failure; + } + } + close(fd); + return TEST_SUCCESS; + +failure: + close(fd); + return TEST_FAILURE; +} + +static int read_log_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + int cmd_fd = -1, log_fd = -1; + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Write data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Unmount and mount again, to see that logs work after remount. */ + close(cmd_fd); + close(log_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + close(log_fd); + free(backing_dir); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + close(log_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static char *setup_mount_dir() +{ + struct stat st; + char *current_dir = getcwd(NULL, 0); + char *mount_dir = concat_file_name(current_dir, "incfs-mount-dir"); + + free(current_dir); + if (stat(mount_dir, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return mount_dir; + + ksft_print_msg("%s is a file, not a dir.\n", mount_dir); + return NULL; + } + + if (mkdir(mount_dir, 0777)) { + print_error("Can't create mount dir."); + return NULL; + } + + return mount_dir; +} + +int main(int argc, char *argv[]) +{ + char *mount_dir = NULL; + int fails = 0; + int i; + int fd, count; + + // Seed randomness pool for testing on QEMU + // NOTE - this abuses the concept of randomness - do *not* ever do this + // on a machine for production use - the device will think it has good + // randomness when it does not. + fd = open("/dev/urandom", O_WRONLY); + count = 4096; + for (int i = 0; i < 128; ++i) + ioctl(fd, RNDADDTOENTCNT, &count); + close(fd); + + ksft_print_header(); + + if (geteuid() != 0) + ksft_print_msg("Not a root, might fail to mount.\n"); + + mount_dir = setup_mount_dir(); + if (mount_dir == NULL) + ksft_exit_fail_msg("Can't create a mount dir\n"); + +#define MAKE_TEST(test) \ + { \ + test, #test \ + } + struct { + int (*pfunc)(char *dir); + const char *name; + } cases[] = { + MAKE_TEST(basic_file_ops_test), + MAKE_TEST(cant_touch_index_test), + MAKE_TEST(dynamic_files_and_data_test), + MAKE_TEST(concurrent_reads_and_writes_test), + MAKE_TEST(attribute_test), + MAKE_TEST(work_after_remount_test), + MAKE_TEST(child_procs_waiting_for_data_test), + MAKE_TEST(multiple_providers_test), + MAKE_TEST(signature_test), + MAKE_TEST(hash_tree_test), + MAKE_TEST(read_log_test), + }; +#undef MAKE_TEST + + ksft_set_plan(ARRAY_SIZE(cases)); + + for (i = 0; i < ARRAY_SIZE(cases); ++i) { + ksft_print_msg("Running %s\n", cases[i].name); + if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", cases[i].name); + else { + ksft_test_result_fail("%s\n", cases[i].name); + fails++; + } + } + + umount2(mount_dir, MNT_FORCE); + rmdir(mount_dir); + + if (fails > 0) + ksft_exit_pass(); + else + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c new file mode 100644 index 000000000000..08b8452ad0bc --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) +{ + static const char fs_name[] = INCFS_NAME; + char mount_options[512]; + int result; + + snprintf(mount_options, ARRAY_SIZE(mount_options), + "read_timeout_ms=%u", + read_timeout_ms); + + result = mount(backing_dir, mount_dir, fs_name, 0, mount_options); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) +{ + static const char fs_name[] = INCFS_NAME; + int result; + + result = mount(backing_dir, mount_dir, fs_name, 0, opt); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +int unlink_node(int fd, int parent_ino, char *filename) +{ + return 0; +} + + +static EVP_PKEY *deserialize_private_key(const char *pem_key) +{ + BIO *bio = NULL; + EVP_PKEY *pkey = NULL; + int len = strlen(pem_key); + + bio = BIO_new_mem_buf(pem_key, len); + if (!bio) + return NULL; + + pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL); + BIO_free(bio); + return pkey; +} + +static X509 *deserialize_cert(const char *pem_cert) +{ + BIO *bio = NULL; + X509 *cert = NULL; + int len = strlen(pem_cert); + + bio = BIO_new_mem_buf(pem_cert, len); + if (!bio) + return NULL; + + cert = PEM_read_bio_X509(bio, NULL, NULL, NULL); + BIO_free(bio); + return cert; +} + +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret) +{ + /* + * PKCS#7 signing flags: + * + * - PKCS7_BINARY signing binary data, so skip MIME translation + * + * - PKCS7_NOATTR omit extra authenticated attributes, such as + * SMIMECapabilities + * + * - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then + * PKCS7_sign_add_signer() can add a signer later. + * This is necessary to change the message digest + * algorithm from the default of SHA-1. Requires + * OpenSSL 1.0.0 or later. + */ + int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL; + void *sig; + size_t sig_size; + BIO *bio = NULL; + PKCS7 *p7 = NULL; + EVP_PKEY *pkey = NULL; + X509 *cert = NULL; + bool ok = false; + + const EVP_MD *md = EVP_sha256(); + + pkey = deserialize_private_key(pkey_pem); + if (!pkey) { + printf("deserialize_private_key failed\n"); + goto out; + } + + cert = deserialize_cert(cert_pem); + if (!cert) { + printf("deserialize_cert failed\n"); + goto out; + } + + bio = BIO_new_mem_buf(data_to_sign, data_size); + if (!bio) + goto out; + + p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags); + if (!p7) { + printf("failed to initialize PKCS#7 signature object\n"); + goto out; + } + + if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) { + printf("failed to add signer to PKCS#7 signature object\n"); + goto out; + } + + if (PKCS7_final(p7, bio, pkcs7_flags) != 1) { + printf("failed to finalize PKCS#7 signature\n"); + goto out; + } + + BIO_free(bio); + bio = BIO_new(BIO_s_mem()); + if (!bio) { + printf("out of memory\n"); + goto out; + } + + if (i2d_PKCS7_bio(bio, p7) != 1) { + printf("failed to DER-encode PKCS#7 signature object\n"); + goto out; + } + + sig_size = BIO_get_mem_data(bio, &sig); + *sig_ret = malloc(sig_size); + memcpy(*sig_ret, sig, sig_size); + *sig_size_ret = sig_size; + ok = true; +out: + PKCS7_free(p7); + BIO_free(bio); + return ok; +} + +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data) +{ + int mode = __S_IFREG | 0555; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = root_hash + ? INCFS_HASH_TREE_SHA256 + : 0, + .root_hash = ptr_to_u64(root_hash), + .additional_data = ptr_to_u64(add_data), + .additional_data_size = strlen(add_data), + .signature = ptr_to_u64(sig), + .signature_size = sig_size, + }; + + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), + .file_attr = 0, + .file_attr_len = 0 + }; + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; + + *id_out = args.file_id; + return 0; +} + + +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr) +{ + int mode = __S_IFREG | 0555; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = 0, + .root_hash = ptr_to_u64(NULL) + }; + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), + .file_attr = ptr_to_u64(attr), + .file_attr_len = attr ? strlen(attr) : 0 + }; + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; + + *id_out = args.file_id; + return 0; +} + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size) +{ + return 0; +} + +int get_file_signature(int fd, unsigned char *buf, int buf_size) +{ + struct incfs_get_file_sig_args args = { + .file_signature = ptr_to_u64(buf), + .file_signature_buf_size = buf_size + }; + + if (ioctl(fd, INCFS_IOC_READ_FILE_SIGNATURE, &args) == 0) + return args.file_signature_len_out; + return -errno; +} + +loff_t get_file_size(char *name) +{ + struct stat st; + + if (stat(name, &st) == 0) + return st.st_size; + return -ENOENT; +} + +int open_commands_file(char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), + "%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME); + cmd_fd = open(cmd_file, O_RDONLY); + + if (cmd_fd < 0) + perror("Can't open commands file"); + return cmd_fd; +} + +int open_log_file(char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); + cmd_fd = open(cmd_file, O_RDWR); + if (cmd_fd < 0) + perror("Can't open log file"); + return cmd_fd; +} + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count) +{ + ssize_t read_res = 0; + + if (timeout_ms > 0) { + int poll_res = 0; + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN + }; + + poll_res = poll(&pollfd, 1, timeout_ms); + if (poll_res < 0) + return -errno; + if (poll_res == 0) + return 0; + if (!(pollfd.revents | POLLIN)) + return 0; + } + + read_res = read(fd, prs, prs_count * sizeof(*prs)); + if (read_res < 0) + return -errno; + + return read_res / sizeof(*prs); +} + +char *concat_file_name(const char *dir, char *file) +{ + char full_name[FILENAME_MAX] = ""; + + if (snprintf(full_name, ARRAY_SIZE(full_name), "%s/%s", dir, file) < 0) + return NULL; + return strdup(full_name); +} + +int delete_dir_tree(const char *dir_path) +{ + DIR *dir = NULL; + struct dirent *dp; + int result = 0; + + dir = opendir(dir_path); + if (!dir) { + result = -errno; + goto out; + } + + while ((dp = readdir(dir))) { + char *full_path; + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + full_path = concat_file_name(dir_path, dp->d_name); + if (dp->d_type == DT_DIR) + result = delete_dir_tree(full_path); + else + result = unlink(full_path); + free(full_path); + if (result) + goto out; + } + +out: + if (dir) + closedir(dir); + if (!result) + rmdir(dir_path); + return result; +} + +void sha256(char *data, size_t dsize, char *hash) +{ + SHA256_CTX ctx; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, data, dsize); + SHA256_Final((unsigned char *)hash, &ctx); +} + +void md5(char *data, size_t dsize, char *hash) +{ + MD5_CTX ctx; + + MD5_Init(&ctx); + MD5_Update(&ctx, data, dsize); + MD5_Final((unsigned char *)hash, &ctx); +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h new file mode 100644 index 000000000000..9c9ba3c5f70a --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#include +#include + +#include "../../include/uapi/linux/incrementalfs.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +#ifdef __LP64__ +#define ptr_to_u64(p) ((__u64)p) +#else +#define ptr_to_u64(p) ((__u64)(__u32)p) +#endif + +#define SHA256_DIGEST_SIZE 32 + +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms); + +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt); + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); + +int get_file_signature(int fd, unsigned char *buf, int buf_size); + +int emit_node(int fd, char *filename, int *ino_out, int parent_ino, + size_t size, mode_t mode, char *attr); + +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr); + +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data); + +int unlink_node(int fd, int parent_ino, char *filename); + +loff_t get_file_size(char *name); + +int open_commands_file(char *mount_dir); + +int open_log_file(char *mount_dir); + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count); + +char *concat_file_name(const char *dir, char *file); + +void sha256(char *data, size_t dsize, char *hash); + +void md5(char *data, size_t dsize, char *hash); + +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret); + +int delete_dir_tree(const char *path);