ANDROID: Initial commit of Incremental FS

Fully working incremental fs filesystem

Signed-off-by: Eugene Zemtsov <ezemtsov@google.com>
Signed-off-by: Paul Lawrence <paullawrence@google.com>

Bug: 133435829
Change-Id: I14741a61ce7891a0f9054e70f026917712cbef78
This commit is contained in:
Eugene Zemtsov 2019-11-18 20:21:06 -08:00 committed by Paul Lawrence
parent cac5f79018
commit 1be052b20b
21 changed files with 8334 additions and 0 deletions

View File

@ -121,6 +121,7 @@ source "fs/quota/Kconfig"
source "fs/autofs/Kconfig"
source "fs/fuse/Kconfig"
source "fs/overlayfs/Kconfig"
source "fs/incfs/Kconfig"
menu "Caches"

View File

@ -112,6 +112,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_OVERLAY_FS) += overlayfs/
obj-$(CONFIG_ORANGEFS_FS) += orangefs/
obj-$(CONFIG_INCREMENTAL_FS) += incfs/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_OMFS_FS) += omfs/

18
fs/incfs/Kconfig Normal file
View File

@ -0,0 +1,18 @@
config INCREMENTAL_FS
tristate "Incremental file system support"
depends on BLOCK
select DECOMPRESS_LZ4
select CRC32
select CRYPTO
select CRYPTO_SHA256
select X509_CERTIFICATE_PARSER
select ASYMMETRIC_KEY_TYPE
select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
select PKCS7_MESSAGE_PARSER
help
Incremental FS is a read-only virtual file system that facilitates execution
of programs while their binaries are still being lazily downloaded over the
network, USB or pigeon post.
To compile this file system support as a module, choose M here: the
module will be called incrementalfs.

9
fs/incfs/Makefile Normal file
View File

@ -0,0 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INCREMENTAL_FS) += incrementalfs.o
incrementalfs-y := \
data_mgmt.o \
format.o \
integrity.o \
main.o \
vfs.o

33
fs/incfs/compat.h Normal file
View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2019 Google LLC
*/
#ifndef _INCFS_COMPAT_H
#define _INCFS_COMPAT_H
#include <linux/lz4.h>
#include <linux/version.h>
typedef unsigned int __poll_t;
#ifndef u64_to_user_ptr
#define u64_to_user_ptr(x) ( \
{ \
typecheck(u64, x); \
(void __user *)(uintptr_t)x; \
} \
)
#endif
#ifndef lru_to_page
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
#endif
#define readahead_gfp_mask(x) \
(mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN)
#ifndef SB_ACTIVE
#define SB_ACTIVE MS_ACTIVE
#endif
#endif /* _INCFS_COMPAT_H */

1142
fs/incfs/data_mgmt.c Normal file

File diff suppressed because it is too large Load Diff

339
fs/incfs/data_mgmt.h Normal file
View File

@ -0,0 +1,339 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2019 Google LLC
*/
#ifndef _INCFS_DATA_MGMT_H
#define _INCFS_DATA_MGMT_H
#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/wait.h>
#include <crypto/hash.h>
#include <uapi/linux/incrementalfs.h>
#include "internal.h"
#define SEGMENTS_PER_FILE 3
struct read_log_record {
u32 block_index : 31;
u32 timed_out : 1;
u64 timestamp_us;
incfs_uuid_t file_id;
} __packed;
struct read_log_state {
/* Next slot in rl_ring_buf to write to. */
u32 next_index;
/* Current number of writer pass over rl_ring_buf */
u32 current_pass_no;
};
/* A ring buffer to save records about data blocks which were recently read. */
struct read_log {
struct read_log_record *rl_ring_buf;
struct read_log_state rl_state;
spinlock_t rl_writer_lock;
int rl_size;
/*
* A queue of waiters who want to be notified about reads.
*/
wait_queue_head_t ml_notif_wq;
};
struct mount_options {
unsigned int read_timeout_ms;
unsigned int readahead_pages;
unsigned int read_log_pages;
unsigned int read_log_wakeup_count;
bool no_backing_file_cache;
bool no_backing_file_readahead;
};
struct mount_info {
struct super_block *mi_sb;
struct path mi_backing_dir_path;
struct dentry *mi_index_dir;
const struct cred *mi_owner;
struct mount_options mi_options;
/* This mutex is to be taken before create, rename, delete */
struct mutex mi_dir_struct_mutex;
/*
* A queue of waiters who want to be notified about new pending reads.
*/
wait_queue_head_t mi_pending_reads_notif_wq;
/*
* Protects:
* - reads_list_head
* - mi_pending_reads_count
* - mi_last_pending_read_number
* - data_file_segment.reads_list_head
*/
struct mutex mi_pending_reads_mutex;
/* List of active pending_read objects */
struct list_head mi_reads_list_head;
/* Total number of items in reads_list_head */
int mi_pending_reads_count;
/*
* Last serial number that was assigned to a pending read.
* 0 means no pending reads have been seen yet.
*/
int mi_last_pending_read_number;
/* Temporary buffer for read logger. */
struct read_log mi_log;
};
struct data_file_block {
loff_t db_backing_file_data_offset;
size_t db_stored_size;
enum incfs_compression_alg db_comp_alg;
};
struct pending_read {
incfs_uuid_t file_id;
s64 timestamp_us;
atomic_t done;
int block_index;
int serial_number;
struct list_head mi_reads_list;
struct list_head segment_reads_list;
};
struct data_file_segment {
wait_queue_head_t new_data_arrival_wq;
/* Protects reads and writes from the blockmap */
/* Good candidate for read/write mutex */
struct mutex blockmap_mutex;
/* List of active pending_read objects belonging to this segment */
/* Protected by mount_info.pending_reads_mutex */
struct list_head reads_list_head;
};
/*
* Extra info associated with a file. Just a few bytes set by a user.
*/
struct file_attr {
loff_t fa_value_offset;
size_t fa_value_size;
u32 fa_crc;
};
struct data_file {
struct backing_file_context *df_backing_file_context;
struct mount_info *df_mount_info;
incfs_uuid_t df_id;
/*
* Array of segments used to reduce lock contention for the file.
* Segment is chosen for a block depends on the block's index.
*/
struct data_file_segment df_segments[SEGMENTS_PER_FILE];
/* Base offset of the first metadata record. */
loff_t df_metadata_off;
/* Base offset of the block map. */
loff_t df_blockmap_off;
/* File size in bytes */
loff_t df_size;
int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */
struct file_attr n_attr;
struct mtree *df_hash_tree;
struct ondisk_signature *df_signature;
/* True, if file signature has already been validated. */
bool df_signature_validated;
};
struct dir_file {
struct mount_info *mount_info;
struct file *backing_dir;
};
struct inode_info {
struct mount_info *n_mount_info; /* A mount, this file belongs to */
struct inode *n_backing_inode;
struct data_file *n_file;
struct inode n_vfs_inode;
};
struct dentry_info {
struct path backing_path;
};
struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
struct mount_options *options,
struct path *backing_dir_path);
void incfs_free_mount_info(struct mount_info *mi);
struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf);
void incfs_free_data_file(struct data_file *df);
int incfs_scan_metadata_chain(struct data_file *df);
struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf);
void incfs_free_dir_file(struct dir_file *dir);
ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df,
int index, int timeout_ms,
struct mem_range tmp);
int incfs_read_file_signature(struct data_file *df, struct mem_range dst);
int incfs_process_new_data_block(struct data_file *df,
struct incfs_new_data_block *block, u8 *data);
int incfs_process_new_hash_block(struct data_file *df,
struct incfs_new_data_block *block, u8 *data);
bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number);
/*
* Collects pending reads and saves them into the array (reads/reads_size).
* Only reads with serial_number > sn_lowerbound are reported.
* Returns how many reads were saved into the array.
*/
int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound,
struct incfs_pending_read_info *reads,
int reads_size);
int incfs_collect_logged_reads(struct mount_info *mi,
struct read_log_state *start_state,
struct incfs_pending_read_info *reads,
int reads_size);
struct read_log_state incfs_get_log_state(struct mount_info *mi);
int incfs_get_uncollected_logs_count(struct mount_info *mi,
struct read_log_state state);
static inline struct inode_info *get_incfs_node(struct inode *inode)
{
if (!inode)
return NULL;
if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) {
/* This inode doesn't belong to us. */
pr_warn_once("incfs: %s on an alien inode.", __func__);
return NULL;
}
return container_of(inode, struct inode_info, n_vfs_inode);
}
static inline struct data_file *get_incfs_data_file(struct file *f)
{
struct inode_info *node = NULL;
if (!f)
return NULL;
if (!S_ISREG(f->f_inode->i_mode))
return NULL;
node = get_incfs_node(f->f_inode);
if (!node)
return NULL;
return node->n_file;
}
static inline struct dir_file *get_incfs_dir_file(struct file *f)
{
if (!f)
return NULL;
if (!S_ISDIR(f->f_inode->i_mode))
return NULL;
return (struct dir_file *)f->private_data;
}
/*
* Make sure that inode_info.n_file is initialized and inode can be used
* for reading and writing data from/to the backing file.
*/
int make_inode_ready_for_data_ops(struct mount_info *mi,
struct inode *inode,
struct file *backing_file);
static inline struct dentry_info *get_incfs_dentry(const struct dentry *d)
{
if (!d)
return NULL;
return (struct dentry_info *)d->d_fsdata;
}
static inline void get_incfs_backing_path(const struct dentry *d,
struct path *path)
{
struct dentry_info *di = get_incfs_dentry(d);
if (!di) {
*path = (struct path) {};
return;
}
*path = di->backing_path;
path_get(path);
}
static inline int get_blocks_count_for_size(u64 size)
{
if (size == 0)
return 0;
return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE;
}
bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs);
#endif /* _INCFS_DATA_MGMT_H */

696
fs/incfs/format.c Normal file
View File

@ -0,0 +1,696 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2018 Google LLC
*/
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/mm.h>
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/kernel.h>
#include "compat.h"
#include "format.h"
struct backing_file_context *incfs_alloc_bfc(struct file *backing_file)
{
struct backing_file_context *result = NULL;
result = kzalloc(sizeof(*result), GFP_NOFS);
if (!result)
return ERR_PTR(-ENOMEM);
result->bc_file = get_file(backing_file);
mutex_init(&result->bc_mutex);
return result;
}
void incfs_free_bfc(struct backing_file_context *bfc)
{
if (!bfc)
return;
if (bfc->bc_file)
fput(bfc->bc_file);
mutex_destroy(&bfc->bc_mutex);
kfree(bfc);
}
loff_t incfs_get_end_offset(struct file *f)
{
/*
* This function assumes that file size and the end-offset
* are the same. This is not always true.
*/
return i_size_read(file_inode(f));
}
/*
* Truncate the tail of the file to the given length.
* Used to rollback partially successful multistep writes.
*/
static int truncate_backing_file(struct backing_file_context *bfc,
loff_t new_end)
{
struct inode *inode = NULL;
struct dentry *dentry = NULL;
loff_t old_end = 0;
struct iattr attr;
int result = 0;
if (!bfc)
return -EFAULT;
LOCK_REQUIRED(bfc->bc_mutex);
if (!bfc->bc_file)
return -EFAULT;
old_end = incfs_get_end_offset(bfc->bc_file);
if (old_end == new_end)
return 0;
if (old_end < new_end)
return -EINVAL;
inode = bfc->bc_file->f_inode;
dentry = bfc->bc_file->f_path.dentry;
attr.ia_size = new_end;
attr.ia_valid = ATTR_SIZE;
inode_lock(inode);
result = notify_change(dentry, &attr, NULL);
inode_unlock(inode);
return result;
}
/* Append a given number of zero bytes to the end of the backing file. */
static int append_zeros(struct backing_file_context *bfc, size_t len)
{
loff_t file_size = 0;
loff_t new_last_byte_offset = 0;
int res = 0;
if (!bfc)
return -EFAULT;
if (len == 0)
return 0;
LOCK_REQUIRED(bfc->bc_mutex);
/*
* Allocate only one byte at the new desired end of the file.
* It will increase file size and create a zeroed area of
* a given size.
*/
file_size = incfs_get_end_offset(bfc->bc_file);
new_last_byte_offset = file_size + len - 1;
res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1);
if (res)
return res;
res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1);
return res;
}
static int write_to_bf(struct backing_file_context *bfc, const void *buf,
size_t count, loff_t pos, bool sync)
{
ssize_t res = 0;
res = incfs_kwrite(bfc->bc_file, buf, count, pos);
if (res < 0)
return res;
if (res != count)
return -EIO;
if (sync)
return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1);
return 0;
}
static u32 calc_md_crc(struct incfs_md_header *record)
{
u32 result = 0;
__le32 saved_crc = record->h_record_crc;
__le64 saved_md_offset = record->h_next_md_offset;
size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size),
INCFS_MAX_METADATA_RECORD_SIZE);
/* Zero fields which needs to be excluded from CRC calculation. */
record->h_record_crc = 0;
record->h_next_md_offset = 0;
result = crc32(0, record, record_size);
/* Restore excluded fields. */
record->h_record_crc = saved_crc;
record->h_next_md_offset = saved_md_offset;
return result;
}
/*
* Append a given metadata record to the backing file and update a previous
* record to add the new record the the metadata list.
*/
static int append_md_to_backing_file(struct backing_file_context *bfc,
struct incfs_md_header *record)
{
int result = 0;
loff_t record_offset;
loff_t file_pos;
__le64 new_md_offset;
size_t record_size;
if (!bfc || !record)
return -EFAULT;
if (bfc->bc_last_md_record_offset < 0)
return -EINVAL;
LOCK_REQUIRED(bfc->bc_mutex);
record_size = le16_to_cpu(record->h_record_size);
file_pos = incfs_get_end_offset(bfc->bc_file);
record->h_prev_md_offset = bfc->bc_last_md_record_offset;
record->h_next_md_offset = 0;
record->h_record_crc = cpu_to_le32(calc_md_crc(record));
/* Write the metadata record to the end of the backing file */
record_offset = file_pos;
new_md_offset = cpu_to_le64(record_offset);
result = write_to_bf(bfc, record, record_size, file_pos, true);
if (result)
return result;
/* Update next metadata offset in a previous record or a superblock. */
if (bfc->bc_last_md_record_offset) {
/*
* Find a place in the previous md record where new record's
* offset needs to be saved.
*/
file_pos = bfc->bc_last_md_record_offset +
offsetof(struct incfs_md_header, h_next_md_offset);
} else {
/*
* No metadata yet, file a place to update in the
* file_header.
*/
file_pos = offsetof(struct incfs_file_header,
fh_first_md_offset);
}
result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset),
file_pos, true);
if (result)
return result;
bfc->bc_last_md_record_offset = record_offset;
return result;
}
/*
* Reserve 0-filled space for the blockmap body, and append
* incfs_blockmap metadata record pointing to it.
*/
int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc,
u32 block_count, loff_t *map_base_off)
{
struct incfs_blockmap blockmap = {};
int result = 0;
loff_t file_end = 0;
size_t map_size = block_count * sizeof(struct incfs_blockmap_entry);
if (!bfc)
return -EFAULT;
blockmap.m_header.h_md_entry_type = INCFS_MD_BLOCK_MAP;
blockmap.m_header.h_record_size = cpu_to_le16(sizeof(blockmap));
blockmap.m_header.h_next_md_offset = cpu_to_le64(0);
blockmap.m_block_count = cpu_to_le32(block_count);
LOCK_REQUIRED(bfc->bc_mutex);
/* Reserve 0-filled space for the blockmap body in the backing file. */
file_end = incfs_get_end_offset(bfc->bc_file);
result = append_zeros(bfc, map_size);
if (result)
return result;
/* Write blockmap metadata record pointing to the body written above. */
blockmap.m_base_offset = cpu_to_le64(file_end);
result = append_md_to_backing_file(bfc, &blockmap.m_header);
if (result) {
/* Error, rollback file changes */
truncate_backing_file(bfc, file_end);
} else if (map_base_off) {
*map_base_off = file_end;
}
return result;
}
/*
* Write file attribute data and metadata record to the backing file.
*/
int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc,
struct mem_range value, struct incfs_file_attr *attr)
{
struct incfs_file_attr file_attr = {};
int result = 0;
u32 crc = 0;
loff_t value_offset = 0;
if (!bfc)
return -EFAULT;
if (value.len > INCFS_MAX_FILE_ATTR_SIZE)
return -ENOSPC;
LOCK_REQUIRED(bfc->bc_mutex);
crc = crc32(0, value.data, value.len);
value_offset = incfs_get_end_offset(bfc->bc_file);
file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR;
file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr));
file_attr.fa_header.h_next_md_offset = cpu_to_le64(0);
file_attr.fa_size = cpu_to_le16((u16)value.len);
file_attr.fa_offset = cpu_to_le64(value_offset);
file_attr.fa_crc = cpu_to_le64(crc);
result = write_to_bf(bfc, value.data, value.len, value_offset, true);
if (result)
return result;
result = append_md_to_backing_file(bfc, &file_attr.fa_header);
if (result) {
/* Error, rollback file changes */
truncate_backing_file(bfc, value_offset);
} else if (attr) {
*attr = file_attr;
}
return result;
}
int incfs_write_signature_to_backing_file(struct backing_file_context *bfc,
u8 hash_alg, u32 tree_size,
struct mem_range root_hash, struct mem_range add_data,
struct mem_range sig)
{
struct incfs_file_signature sg = {};
int result = 0;
loff_t rollback_pos = 0;
loff_t tree_area_pos = 0;
size_t alignment = 0;
if (!bfc)
return -EFAULT;
if (root_hash.len > sizeof(sg.sg_root_hash))
return -E2BIG;
LOCK_REQUIRED(bfc->bc_mutex);
rollback_pos = incfs_get_end_offset(bfc->bc_file);
sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE;
sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg));
sg.sg_header.h_next_md_offset = cpu_to_le64(0);
sg.sg_hash_alg = hash_alg;
if (sig.data != NULL && sig.len > 0) {
loff_t pos = incfs_get_end_offset(bfc->bc_file);
sg.sg_sig_size = cpu_to_le32(sig.len);
sg.sg_sig_offset = cpu_to_le64(pos);
result = write_to_bf(bfc, sig.data, sig.len, pos, false);
if (result)
goto err;
}
if (add_data.len > 0) {
loff_t pos = incfs_get_end_offset(bfc->bc_file);
sg.sg_add_data_size = cpu_to_le32(add_data.len);
sg.sg_add_data_offset = cpu_to_le64(pos);
result = write_to_bf(bfc, add_data.data,
add_data.len, pos, false);
if (result)
goto err;
}
tree_area_pos = incfs_get_end_offset(bfc->bc_file);
if (hash_alg && tree_size > 0) {
if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) {
/*
* If hash tree is big enough, it makes sense to
* align in the backing file for faster access.
*/
loff_t offset = round_up(tree_area_pos, PAGE_SIZE);
alignment = offset - tree_area_pos;
tree_area_pos = offset;
}
/*
* If root hash is not the only hash in the tree.
* reserve 0-filled space for the tree.
*/
result = append_zeros(bfc, tree_size + alignment);
if (result)
goto err;
sg.sg_hash_tree_size = cpu_to_le32(tree_size);
sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos);
}
memcpy(sg.sg_root_hash, root_hash.data, root_hash.len);
/* Write a hash tree metadata record pointing to the hash tree above. */
result = append_md_to_backing_file(bfc, &sg.sg_header);
err:
if (result) {
/* Error, rollback file changes */
truncate_backing_file(bfc, rollback_pos);
}
return result;
}
/*
* Write a backing file header
* It should always be called only on empty file.
* incfs_super_block.s_first_md_offset is 0 for now, but will be updated
* once first metadata record is added.
*/
int incfs_write_fh_to_backing_file(struct backing_file_context *bfc,
incfs_uuid_t *uuid, u64 file_size)
{
struct incfs_file_header fh = {};
loff_t file_pos = 0;
if (!bfc)
return -EFAULT;
fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER);
fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER);
fh.fh_header_size = cpu_to_le16(sizeof(fh));
fh.fh_first_md_offset = cpu_to_le64(0);
fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE);
fh.fh_file_size = cpu_to_le64(file_size);
fh.fh_uuid = *uuid;
LOCK_REQUIRED(bfc->bc_mutex);
file_pos = incfs_get_end_offset(bfc->bc_file);
if (file_pos != 0)
return -EEXIST;
return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true);
}
/* Write a given data block and update file's blockmap to point it. */
int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc,
struct mem_range block, int block_index,
loff_t bm_base_off, u16 flags)
{
struct incfs_blockmap_entry bm_entry = {};
int result = 0;
loff_t data_offset = 0;
loff_t bm_entry_off =
bm_base_off + sizeof(struct incfs_blockmap_entry) * block_index;
if (!bfc)
return -EFAULT;
if (block.len >= (1 << 16) || block_index < 0)
return -EINVAL;
LOCK_REQUIRED(bfc->bc_mutex);
data_offset = incfs_get_end_offset(bfc->bc_file);
if (data_offset <= bm_entry_off) {
/* Blockmap entry is beyond the file's end. It is not normal. */
return -EINVAL;
}
/* Write the block data at the end of the backing file. */
result = write_to_bf(bfc, block.data, block.len, data_offset, false);
if (result)
return result;
/* Update the blockmap to point to the newly written data. */
bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset);
bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32));
bm_entry.me_data_size = cpu_to_le16((u16)block.len);
bm_entry.me_flags = cpu_to_le16(flags);
result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry),
bm_entry_off, false);
return result;
}
int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc,
struct mem_range block,
int block_index, loff_t hash_area_off)
{
loff_t data_offset = 0;
loff_t file_end = 0;
if (!bfc)
return -EFAULT;
LOCK_REQUIRED(bfc->bc_mutex);
data_offset = hash_area_off + block_index * INCFS_DATA_FILE_BLOCK_SIZE;
file_end = incfs_get_end_offset(bfc->bc_file);
if (data_offset + block.len > file_end) {
/* Block is located beyond the file's end. It is not normal. */
return -EINVAL;
}
return write_to_bf(bfc, block.data, block.len, data_offset, false);
}
/* Initialize a new image in a given backing file. */
int incfs_make_empty_backing_file(struct backing_file_context *bfc,
incfs_uuid_t *uuid, u64 file_size)
{
int result = 0;
if (!bfc || !bfc->bc_file)
return -EFAULT;
result = mutex_lock_interruptible(&bfc->bc_mutex);
if (result)
goto out;
result = truncate_backing_file(bfc, 0);
if (result)
goto out;
result = incfs_write_fh_to_backing_file(bfc, uuid, file_size);
out:
mutex_unlock(&bfc->bc_mutex);
return result;
}
int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index,
loff_t bm_base_off,
struct incfs_blockmap_entry *bm_entry)
{
return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1,
bm_base_off);
}
int incfs_read_blockmap_entries(struct backing_file_context *bfc,
struct incfs_blockmap_entry *entries,
int start_index, int blocks_number,
loff_t bm_base_off)
{
loff_t bm_entry_off =
bm_base_off + sizeof(struct incfs_blockmap_entry) * start_index;
const size_t bytes_to_read = sizeof(struct incfs_blockmap_entry)
* blocks_number;
int result = 0;
if (!bfc || !entries)
return -EFAULT;
if (start_index < 0 || bm_base_off <= 0)
return -ENODATA;
result = incfs_kread(bfc->bc_file, entries, bytes_to_read,
bm_entry_off);
if (result < 0)
return result;
if (result < bytes_to_read)
return -EIO;
return 0;
}
int incfs_read_file_header(struct backing_file_context *bfc,
loff_t *first_md_off, incfs_uuid_t *uuid,
u64 *file_size)
{
ssize_t bytes_read = 0;
struct incfs_file_header fh = {};
if (!bfc || !first_md_off)
return -EFAULT;
LOCK_REQUIRED(bfc->bc_mutex);
bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0);
if (bytes_read < 0)
return bytes_read;
if (bytes_read < sizeof(fh))
return -EBADMSG;
if (le64_to_cpu(fh.fh_magic) != INCFS_MAGIC_NUMBER)
return -EILSEQ;
if (le64_to_cpu(fh.fh_version) > INCFS_FORMAT_CURRENT_VER)
return -EILSEQ;
if (le16_to_cpu(fh.fh_data_block_size) != INCFS_DATA_FILE_BLOCK_SIZE)
return -EILSEQ;
if (le16_to_cpu(fh.fh_header_size) != sizeof(fh))
return -EILSEQ;
if (first_md_off)
*first_md_off = le64_to_cpu(fh.fh_first_md_offset);
if (uuid)
*uuid = fh.fh_uuid;
if (file_size)
*file_size = le64_to_cpu(fh.fh_file_size);
return 0;
}
/*
* Read through metadata records from the backing file one by one
* and call provided metadata handlers.
*/
int incfs_read_next_metadata_record(struct backing_file_context *bfc,
struct metadata_handler *handler)
{
const ssize_t max_md_size = INCFS_MAX_METADATA_RECORD_SIZE;
ssize_t bytes_read = 0;
size_t md_record_size = 0;
loff_t next_record = 0;
loff_t prev_record = 0;
int res = 0;
struct incfs_md_header *md_hdr = NULL;
if (!bfc || !handler)
return -EFAULT;
LOCK_REQUIRED(bfc->bc_mutex);
if (handler->md_record_offset == 0)
return -EPERM;
memset(&handler->md_buffer, 0, max_md_size);
bytes_read = incfs_kread(bfc->bc_file, &handler->md_buffer,
max_md_size, handler->md_record_offset);
if (bytes_read < 0)
return bytes_read;
if (bytes_read < sizeof(*md_hdr))
return -EBADMSG;
md_hdr = &handler->md_buffer.md_header;
next_record = le64_to_cpu(md_hdr->h_next_md_offset);
prev_record = le64_to_cpu(md_hdr->h_prev_md_offset);
md_record_size = le16_to_cpu(md_hdr->h_record_size);
if (md_record_size > max_md_size) {
pr_warn("incfs: The record is too large. Size: %ld",
md_record_size);
return -EBADMSG;
}
if (bytes_read < md_record_size) {
pr_warn("incfs: The record hasn't been fully read.");
return -EBADMSG;
}
if (next_record <= handler->md_record_offset && next_record != 0) {
pr_warn("incfs: Next record (%lld) points back in file.",
next_record);
return -EBADMSG;
}
if (prev_record != handler->md_prev_record_offset) {
pr_warn("incfs: Metadata chain has been corrupted.");
return -EBADMSG;
}
if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) {
pr_warn("incfs: Metadata CRC mismatch.");
return -EBADMSG;
}
switch (md_hdr->h_md_entry_type) {
case INCFS_MD_NONE:
break;
case INCFS_MD_BLOCK_MAP:
if (handler->handle_blockmap)
res = handler->handle_blockmap(
&handler->md_buffer.blockmap, handler);
break;
case INCFS_MD_FILE_ATTR:
if (handler->handle_file_attr)
res = handler->handle_file_attr(
&handler->md_buffer.file_attr, handler);
break;
case INCFS_MD_SIGNATURE:
if (handler->handle_signature)
res = handler->handle_signature(
&handler->md_buffer.signature, handler);
break;
default:
res = -ENOTSUPP;
break;
}
if (!res) {
if (next_record == 0) {
/*
* Zero offset for the next record means that the last
* metadata record has just been processed.
*/
bfc->bc_last_md_record_offset =
handler->md_record_offset;
}
handler->md_prev_record_offset = handler->md_record_offset;
handler->md_record_offset = next_record;
}
return res;
}
ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
return kernel_read(f, pos, (char *)buf, size);
#else
return kernel_read(f, buf, size, &pos);
#endif
}
ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
return kernel_write(f, buf, size, pos);
#else
return kernel_write(f, buf, size, &pos);
#endif
}

349
fs/incfs/format.h Normal file
View File

@ -0,0 +1,349 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2018 Google LLC
*/
/*
* Overview
* --------
* The backbone of the incremental-fs ondisk format is an append only linked
* list of metadata blocks. Each metadata block contains an offset of the next
* one. These blocks describe files and directories on the
* file system. They also represent actions of adding and removing file names
* (hard links).
*
* Every time incremental-fs instance is mounted, it reads through this list
* to recreate filesystem's state in memory. An offset of the first record in
* the metadata list is stored in the superblock at the beginning of the backing
* file.
*
* Most of the backing file is taken by data areas and blockmaps.
* Since data blocks can be compressed and have different sizes,
* single per-file data area can't be pre-allocated. That's why blockmaps are
* needed in order to find a location and size of each data block in
* the backing file. Each time a file is created, a corresponding block map is
* allocated to store future offsets of data blocks.
*
* Whenever a data block is given by data loader to incremental-fs:
* - A data area with the given block is appended to the end of
* the backing file.
* - A record in the blockmap for the given block index is updated to reflect
* its location, size, and compression algorithm.
* Metadata records
* ----------------
* incfs_blockmap - metadata record that specifies size and location
* of a blockmap area for a given file. This area
* contains an array of incfs_blockmap_entry-s.
* incfs_file_signature - metadata record that specifies where file signature
* and its hash tree can be found in the backing file.
*
* incfs_file_attr - metadata record that specifies where additional file
* attributes blob can be found.
*
* Metadata header
* ---------------
* incfs_md_header - header of a metadata record. It's always a part
* of other structures and served purpose of metadata
* bookkeeping.
*
* +-----------------------------------------------+ ^
* | incfs_md_header | |
* | 1. type of body(BLOCKMAP, FILE_ATTR..) | |
* | 2. size of the whole record header + body | |
* | 3. CRC the whole record header + body | |
* | 4. offset of the previous md record |]------+
* | 5. offset of the next md record (md link) |]---+
* +-----------------------------------------------+ |
* | Metadata record body with useful data | |
* +-----------------------------------------------+ |
* +--->
*
* Other ondisk structures
* -----------------------
* incfs_super_block - backing file header
* incfs_blockmap_entry - a record in a blockmap area that describes size
* and location of a data block.
* Data blocks dont have any particular structure, they are written to the
* backing file in a raw form as they come from a data loader.
*
* Backing file layout
* -------------------
*
*
* +-------------------------------------------+
* | incfs_super_block |]---+
* +-------------------------------------------+ |
* | metadata |<---+
* | incfs_file_signature |]---+
* +-------------------------------------------+ |
* ......................... |
* +-------------------------------------------+ | metadata
* +------->| blockmap area | | list links
* | | [incfs_blockmap_entry] | |
* | | [incfs_blockmap_entry] | |
* | | [incfs_blockmap_entry] | |
* | +--[| [incfs_blockmap_entry] | |
* | | | [incfs_blockmap_entry] | |
* | | | [incfs_blockmap_entry] | |
* | | +-------------------------------------------+ |
* | | ......................... |
* | | +-------------------------------------------+ |
* | | | metadata |<---+
* +----|--[| incfs_blockmap |]---+
* | +-------------------------------------------+ |
* | ......................... |
* | +-------------------------------------------+ |
* +-->| data block | |
* +-------------------------------------------+ |
* ......................... |
* +-------------------------------------------+ |
* | metadata |<---+
* | incfs_file_attr |
* +-------------------------------------------+
*/
#ifndef _INCFS_FORMAT_H
#define _INCFS_FORMAT_H
#include <linux/types.h>
#include <linux/kernel.h>
#include <uapi/linux/incrementalfs.h>
#include "internal.h"
#define INCFS_MAX_NAME_LEN 255
#define INCFS_FORMAT_V1 1
#define INCFS_FORMAT_CURRENT_VER INCFS_FORMAT_V1
enum incfs_metadata_type {
INCFS_MD_NONE = 0,
INCFS_MD_BLOCK_MAP = 1,
INCFS_MD_FILE_ATTR = 2,
INCFS_MD_SIGNATURE = 3
};
/* Header included at the beginning of all metadata records on the disk. */
struct incfs_md_header {
__u8 h_md_entry_type;
/*
* Size of the metadata record.
* (e.g. inode, dir entry etc) not just this struct.
*/
__le16 h_record_size;
/*
* CRC32 of the metadata record.
* (e.g. inode, dir entry etc) not just this struct.
*/
__le32 h_record_crc;
/* Offset of the next metadata entry if any */
__le64 h_next_md_offset;
/* Offset of the previous metadata entry if any */
__le64 h_prev_md_offset;
} __packed;
/* Backing file header */
struct incfs_file_header {
/* Magic number: INCFS_MAGIC_NUMBER */
__le64 fh_magic;
/* Format version: INCFS_FORMAT_CURRENT_VER */
__le64 fh_version;
/* sizeof(incfs_file_header) */
__le16 fh_header_size;
/* INCFS_DATA_FILE_BLOCK_SIZE */
__le16 fh_data_block_size;
/* Padding, also reserved for future use. */
__le32 fh_dummy;
/* Offset of the first metadata record */
__le64 fh_first_md_offset;
/*
* Put file specific information after this point
*/
/* Full size of the file's content */
__le64 fh_file_size;
/* File uuid */
incfs_uuid_t fh_uuid;
} __packed;
enum incfs_block_map_entry_flags {
INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0),
};
/* Block map entry pointing to an actual location of the data block. */
struct incfs_blockmap_entry {
/* Offset of the actual data block. Lower 32 bits */
__le32 me_data_offset_lo;
/* Offset of the actual data block. Higher 16 bits */
__le16 me_data_offset_hi;
/* How many bytes the data actually occupies in the backing file */
__le16 me_data_size;
/* Block flags from incfs_block_map_entry_flags */
__le16 me_flags;
} __packed;
/* Metadata record for locations of file blocks. Type = INCFS_MD_BLOCK_MAP */
struct incfs_blockmap {
struct incfs_md_header m_header;
/* Base offset of the array of incfs_blockmap_entry */
__le64 m_base_offset;
/* Size of the map entry array in blocks */
__le32 m_block_count;
} __packed;
/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */
struct incfs_file_attr {
struct incfs_md_header fa_header;
__le64 fa_offset;
__le16 fa_size;
__le32 fa_crc;
} __packed;
/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */
struct incfs_file_signature {
struct incfs_md_header sg_header;
__u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */
__le32 sg_hash_tree_size; /* The size of the hash tree. */
__le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */
__u8 sg_root_hash[INCFS_MAX_HASH_SIZE];
__le32 sg_sig_size; /* The size of the pkcs7 signature. */
__le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */
__le32 sg_add_data_size; /* The size of the additional data. */
__le64 sg_add_data_offset; /* Additional data's offset */
} __packed;
/* State of the backing file. */
struct backing_file_context {
/* Protects writes to bc_file */
struct mutex bc_mutex;
/* File object to read data from */
struct file *bc_file;
/*
* Offset of the last known metadata record in the backing file.
* 0 means there are no metadata records.
*/
loff_t bc_last_md_record_offset;
};
/* Backing file locations of things required for signature validation. */
struct ondisk_signature {
loff_t add_data_offset; /* Additional data's offset */
loff_t sig_offset; /* pkcs7 signature's offset in the backing file */
loff_t mtree_offset; /* Backing file offset of the hash tree. */
u32 add_data_size; /* The size of the additional data. */
u32 sig_size; /* The size of the pkcs7 signature. */
u32 mtree_size; /* The size of the hash tree. */
};
struct metadata_handler {
loff_t md_record_offset;
loff_t md_prev_record_offset;
void *context;
union {
struct incfs_md_header md_header;
struct incfs_blockmap blockmap;
struct incfs_file_attr file_attr;
struct incfs_file_signature signature;
} md_buffer;
int (*handle_blockmap)(struct incfs_blockmap *bm,
struct metadata_handler *handler);
int (*handle_file_attr)(struct incfs_file_attr *fa,
struct metadata_handler *handler);
int (*handle_signature)(struct incfs_file_signature *sig,
struct metadata_handler *handler);
};
#define INCFS_MAX_METADATA_RECORD_SIZE \
FIELD_SIZEOF(struct metadata_handler, md_buffer)
loff_t incfs_get_end_offset(struct file *f);
/* Backing file context management */
struct backing_file_context *incfs_alloc_bfc(struct file *backing_file);
void incfs_free_bfc(struct backing_file_context *bfc);
/* Writing stuff */
int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc,
u32 block_count, loff_t *map_base_off);
int incfs_write_fh_to_backing_file(struct backing_file_context *bfc,
incfs_uuid_t *uuid, u64 file_size);
int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc,
struct mem_range block,
int block_index, loff_t bm_base_off,
u16 flags);
int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc,
struct mem_range block,
int block_index, loff_t hash_area_off);
int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc,
struct mem_range value, struct incfs_file_attr *attr);
int incfs_write_signature_to_backing_file(struct backing_file_context *bfc,
u8 hash_alg, u32 tree_size,
struct mem_range root_hash, struct mem_range add_data,
struct mem_range sig);
int incfs_make_empty_backing_file(struct backing_file_context *bfc,
incfs_uuid_t *uuid, u64 file_size);
/* Reading stuff */
int incfs_read_file_header(struct backing_file_context *bfc,
loff_t *first_md_off, incfs_uuid_t *uuid,
u64 *file_size);
int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index,
loff_t bm_base_off,
struct incfs_blockmap_entry *bm_entry);
int incfs_read_blockmap_entries(struct backing_file_context *bfc,
struct incfs_blockmap_entry *entries,
int start_index, int blocks_number,
loff_t bm_base_off);
int incfs_read_next_metadata_record(struct backing_file_context *bfc,
struct metadata_handler *handler);
ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos);
ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos);
#endif /* _INCFS_FORMAT_H */

217
fs/incfs/integrity.c Normal file
View File

@ -0,0 +1,217 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2019 Google LLC
*/
#include <crypto/sha.h>
#include <crypto/hash.h>
#include <linux/err.h>
#include <linux/version.h>
#include <crypto/pkcs7.h>
#include "integrity.h"
int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob,
struct mem_range root_hash, struct mem_range add_data)
{
struct pkcs7_message *pkcs7 = NULL;
const void *data = NULL;
size_t data_len = 0;
const char *p;
int err;
pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len);
if (IS_ERR(pkcs7)) {
pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n",
pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7));
return PTR_ERR(pkcs7);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
err = pkcs7_get_content_data(pkcs7, &data, &data_len, false);
#else
err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL);
#endif
if (err || data_len == 0 || data == NULL) {
pr_debug("PKCS#7 message does not contain data\n");
err = -EBADMSG;
goto out;
}
if (root_hash.len == 0) {
pr_debug("Root hash is empty.\n");
err = -EBADMSG;
goto out;
}
if (data_len != root_hash.len + add_data.len) {
pr_debug("PKCS#7 data size doesn't match arguments.\n");
err = -EKEYREJECTED;
goto out;
}
p = data;
if (memcmp(p, root_hash.data, root_hash.len) != 0) {
pr_debug("Root hash mismatch.\n");
err = -EKEYREJECTED;
goto out;
}
p += root_hash.len;
if (memcmp(p, add_data.data, add_data.len) != 0) {
pr_debug("Additional data mismatch.\n");
err = -EKEYREJECTED;
goto out;
}
err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE);
if (err)
pr_debug("PKCS#7 signature verification error: %d\n", -err);
/*
* RSA signature verification sometimes returns unexpected error codes
* when signature doesn't match.
*/
if (err == -ERANGE || err == -EINVAL)
err = -EBADMSG;
out:
pkcs7_free_message(pkcs7);
return err;
}
struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id)
{
static struct incfs_hash_alg sha256 = {
.name = "sha256",
.digest_size = SHA256_DIGEST_SIZE,
.id = INCFS_HASH_TREE_SHA256
};
struct incfs_hash_alg *result = NULL;
struct crypto_shash *shash;
if (id == INCFS_HASH_TREE_SHA256) {
BUILD_BUG_ON(INCFS_MAX_HASH_SIZE < SHA256_DIGEST_SIZE);
result = &sha256;
}
if (result == NULL)
return ERR_PTR(-ENOENT);
/* pairs with cmpxchg_release() below */
shash = smp_load_acquire(&result->shash);
if (shash)
return result;
shash = crypto_alloc_shash(result->name, 0, 0);
if (IS_ERR(shash)) {
int err = PTR_ERR(shash);
pr_err("Can't allocate hash alg %s, error code:%d",
result->name, err);
return ERR_PTR(err);
}
/* pairs with smp_load_acquire() above */
if (cmpxchg_release(&result->shash, NULL, shash) != NULL)
crypto_free_shash(shash);
return result;
}
struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id,
int data_block_count,
struct mem_range root_hash)
{
struct mtree *result = NULL;
struct incfs_hash_alg *hash_alg = NULL;
int hash_per_block;
int lvl;
int total_blocks = 0;
int blocks_in_level[INCFS_MAX_MTREE_LEVELS];
int blocks = data_block_count;
if (data_block_count <= 0)
return ERR_PTR(-EINVAL);
hash_alg = incfs_get_hash_alg(id);
if (IS_ERR(hash_alg))
return ERR_PTR(PTR_ERR(hash_alg));
if (root_hash.len < hash_alg->digest_size)
return ERR_PTR(-EINVAL);
result = kzalloc(sizeof(*result), GFP_NOFS);
if (!result)
return ERR_PTR(-ENOMEM);
result->alg = hash_alg;
hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / result->alg->digest_size;
/* Calculating tree geometry. */
/* First pass: calculate how many blocks in each tree level. */
for (lvl = 0; blocks > 1; lvl++) {
if (lvl >= INCFS_MAX_MTREE_LEVELS) {
pr_err("incfs: too much data in mtree");
goto err;
}
blocks = (blocks + hash_per_block - 1) / hash_per_block;
blocks_in_level[lvl] = blocks;
total_blocks += blocks;
}
result->depth = lvl;
result->hash_tree_area_size = total_blocks * INCFS_DATA_FILE_BLOCK_SIZE;
if (result->hash_tree_area_size > INCFS_MAX_HASH_AREA_SIZE)
goto err;
blocks = 0;
/* Second pass: calculate offset of each level. 0th level goes last. */
for (lvl = 0; lvl < result->depth; lvl++) {
u32 suboffset;
blocks += blocks_in_level[lvl];
suboffset = (total_blocks - blocks)
* INCFS_DATA_FILE_BLOCK_SIZE;
result->hash_level_suboffset[lvl] = suboffset;
}
/* Root hash is stored separately from the rest of the tree. */
memcpy(result->root_hash, root_hash.data, hash_alg->digest_size);
return result;
err:
kfree(result);
return ERR_PTR(-E2BIG);
}
void incfs_free_mtree(struct mtree *tree)
{
kfree(tree);
}
int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data,
struct mem_range digest)
{
SHASH_DESC_ON_STACK(desc, alg->shash);
if (!alg || !alg->shash || !data.data || !digest.data)
return -EFAULT;
if (alg->digest_size > digest.len)
return -EINVAL;
desc->tfm = alg->shash;
return crypto_shash_digest(desc, data.data, data.len, digest.data);
}
void incfs_free_signature_info(struct signature_info *si)
{
if (!si)
return;
kfree(si->root_hash.data);
kfree(si->additional_data.data);
kfree(si->signature.data);
kfree(si);
}

72
fs/incfs/integrity.h Normal file
View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2019 Google LLC
*/
#ifndef _INCFS_INTEGRITY_H
#define _INCFS_INTEGRITY_H
#include <linux/types.h>
#include <linux/kernel.h>
#include <crypto/hash.h>
#include <uapi/linux/incrementalfs.h>
#include "internal.h"
#define INCFS_MAX_MTREE_LEVELS 8
#define INCFS_MAX_HASH_AREA_SIZE (1280 * 1024 * 1024)
struct incfs_hash_alg {
const char *name;
int digest_size;
enum incfs_hash_tree_algorithm id;
struct crypto_shash *shash;
};
/* Merkle tree structure. */
struct mtree {
struct incfs_hash_alg *alg;
u8 root_hash[INCFS_MAX_HASH_SIZE];
/* Offset of each hash level in the hash area. */
u32 hash_level_suboffset[INCFS_MAX_MTREE_LEVELS];
u32 hash_tree_area_size;
/* Number of levels in hash_level_suboffset */
int depth;
};
struct signature_info {
struct mem_range root_hash;
struct mem_range additional_data;
struct mem_range signature;
enum incfs_hash_tree_algorithm hash_alg;
};
struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id);
struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id,
int data_block_count,
struct mem_range root_hash);
void incfs_free_mtree(struct mtree *tree);
size_t incfs_get_mtree_depth(enum incfs_hash_tree_algorithm alg, loff_t size);
size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg,
loff_t size);
int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data,
struct mem_range digest);
int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob,
struct mem_range root_hash, struct mem_range add_data);
void incfs_free_signature_info(struct signature_info *si);
#endif /* _INCFS_INTEGRITY_H */

21
fs/incfs/internal.h Normal file
View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2018 Google LLC
*/
#ifndef _INCFS_INTERNAL_H
#define _INCFS_INTERNAL_H
#include <linux/types.h>
struct mem_range {
u8 *data;
size_t len;
};
static inline struct mem_range range(u8 *data, size_t len)
{
return (struct mem_range){ .data = data, .len = len };
}
#define LOCK_REQUIRED(lock) WARN_ON_ONCE(!mutex_is_locked(&lock))
#endif /* _INCFS_INTERNAL_H */

103
fs/incfs/main.c Normal file
View File

@ -0,0 +1,103 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2018 Google LLC
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <uapi/linux/incrementalfs.h>
#include "vfs.h"
#define INCFS_NODE_FEATURES "features"
struct file_system_type incfs_fs_type = {
.owner = THIS_MODULE,
.name = INCFS_NAME,
.mount = incfs_mount_fs,
.kill_sb = incfs_kill_sb,
.fs_flags = 0
};
static struct kobject *sysfs_root, *featurefs_root;
static ssize_t corefs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
return snprintf(buff, PAGE_SIZE, "supported\n");
}
static struct kobj_attribute corefs_attr = __ATTR_RO(corefs);
static struct attribute *attributes[] = {
&corefs_attr.attr,
NULL,
};
static const struct attribute_group attr_group = {
.attrs = attributes,
};
static int __init init_sysfs(void)
{
int res = 0;
sysfs_root = kobject_create_and_add(INCFS_NAME, fs_kobj);
if (!sysfs_root)
return -ENOMEM;
featurefs_root = kobject_create_and_add(INCFS_NODE_FEATURES,
sysfs_root);
if (!featurefs_root)
return -ENOMEM;
res = sysfs_create_group(featurefs_root, &attr_group);
if (res) {
kobject_put(sysfs_root);
sysfs_root = NULL;
}
return res;
}
static void cleanup_sysfs(void)
{
if (featurefs_root) {
sysfs_remove_group(featurefs_root, &attr_group);
kobject_put(featurefs_root);
featurefs_root = NULL;
}
if (sysfs_root) {
kobject_put(sysfs_root);
sysfs_root = NULL;
}
}
static int __init init_incfs_module(void)
{
int err = 0;
err = init_sysfs();
if (err)
return err;
err = register_filesystem(&incfs_fs_type);
if (err)
cleanup_sysfs();
return err;
}
static void __exit cleanup_incfs_module(void)
{
cleanup_sysfs();
unregister_filesystem(&incfs_fs_type);
}
module_init(init_incfs_module);
module_exit(cleanup_incfs_module);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Eugene Zemtsov <ezemtsov@google.com>");
MODULE_DESCRIPTION("Incremental File System");

2202
fs/incfs/vfs.c Normal file

File diff suppressed because it is too large Load Diff

13
fs/incfs/vfs.h Normal file
View File

@ -0,0 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2018 Google LLC
*/
#ifndef _INCFS_VFS_H
#define _INCFS_VFS_H
void incfs_kill_sb(struct super_block *sb);
struct dentry *incfs_mount_fs(struct file_system_type *type, int flags,
const char *dev_name, void *data);
#endif

View File

@ -0,0 +1,244 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Userspace interface for Incremental FS.
*
* Incremental FS is special-purpose Linux virtual file system that allows
* execution of a program while its binary and resource files are still being
* lazily downloaded over the network, USB etc.
*
* Copyright 2019 Google LLC
*/
#ifndef _UAPI_LINUX_INCREMENTALFS_H
#define _UAPI_LINUX_INCREMENTALFS_H
#include <linux/limits.h>
#include <linux/ioctl.h>
#include <linux/types.h>
#include <linux/xattr.h>
/* ===== constants ===== */
#define INCFS_NAME "incremental-fs"
#define INCFS_MAGIC_NUMBER (0x5346434e49ul)
#define INCFS_DATA_FILE_BLOCK_SIZE 4096
#define INCFS_HEADER_VER 1
// TODO: This value is assumed in incfs_copy_signature_info_from_user to be the
// actual signature length. Set back to 64 when fixed.
#define INCFS_MAX_HASH_SIZE 32
#define INCFS_MAX_FILE_ATTR_SIZE 512
#define INCFS_PENDING_READS_FILENAME ".pending_reads"
#define INCFS_LOG_FILENAME ".log"
#define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id")
#define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size")
#define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata")
#define INCFS_MAX_SIGNATURE_SIZE 8096
#define INCFS_IOCTL_BASE_CODE 'g'
/* ===== ioctl requests on the command dir ===== */
/* Create a new file */
#define INCFS_IOC_CREATE_FILE \
_IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args)
/* Read file signature */
#define INCFS_IOC_READ_FILE_SIGNATURE \
_IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args)
enum incfs_compression_alg {
COMPRESSION_NONE = 0,
COMPRESSION_LZ4 = 1
};
enum incfs_block_flags {
INCFS_BLOCK_FLAGS_NONE = 0,
INCFS_BLOCK_FLAGS_HASH = 1,
};
typedef struct {
__u8 bytes[16];
} incfs_uuid_t __attribute__((aligned (8)));
/*
* Description of a pending read. A pending read - a read call by
* a userspace program for which the filesystem currently doesn't have data.
*/
struct incfs_pending_read_info {
/* Id of a file that is being read from. */
incfs_uuid_t file_id;
/* A number of microseconds since system boot to the read. */
__aligned_u64 timestamp_us;
/* Index of a file block that is being read. */
__u32 block_index;
/* A serial number of this pending read. */
__u32 serial_number;
};
/*
* A struct to be written into a control file to load a data or hash
* block to a data file.
*/
struct incfs_new_data_block {
/* Index of a data block. */
__u32 block_index;
/* Length of data */
__u32 data_len;
/*
* A pointer to an actual data for the block.
*
* Equivalent to: __u8 *data;
*/
__aligned_u64 data;
/*
* Compression algorithm used to compress the data block.
* Values from enum incfs_compression_alg.
*/
__u8 compression;
/* Values from enum incfs_block_flags */
__u8 flags;
__u16 reserved1;
__u32 reserved2;
__aligned_u64 reserved3;
};
enum incfs_hash_tree_algorithm {
INCFS_HASH_TREE_NONE = 0,
INCFS_HASH_TREE_SHA256 = 1
};
struct incfs_file_signature_info {
/*
* A pointer to file's root hash (if determined != 0)
* Actual hash size determined by hash_tree_alg.
* Size of the buffer should be at least INCFS_MAX_HASH_SIZE
*
* Equivalent to: u8 *root_hash;
*/
__aligned_u64 root_hash;
/*
* A pointer to additional data that was attached to the root hash
* before signing.
*
* Equivalent to: u8 *additional_data;
*/
__aligned_u64 additional_data;
/* Size of additional data. */
__u32 additional_data_size;
__u32 reserved1;
/*
* A pointer to pkcs7 signature DER blob.
*
* Equivalent to: u8 *signature;
*/
__aligned_u64 signature;
/* Size of pkcs7 signature DER blob */
__u32 signature_size;
__u32 reserved2;
/* Value from incfs_hash_tree_algorithm */
__u8 hash_tree_alg;
};
/*
* Create a new file or directory.
*/
struct incfs_new_file_args {
/* Id of a file to create. */
incfs_uuid_t file_id;
/*
* Total size of the new file. Ignored if S_ISDIR(mode).
*/
__aligned_u64 size;
/*
* File mode. Permissions and dir flag.
*/
__u16 mode;
__u16 reserved1;
__u32 reserved2;
/*
* A pointer to a null-terminated relative path to the file's parent
* dir.
* Max length: PATH_MAX
*
* Equivalent to: char *directory_path;
*/
__aligned_u64 directory_path;
/*
* A pointer to a null-terminated file's name.
* Max length: PATH_MAX
*
* Equivalent to: char *file_name;
*/
__aligned_u64 file_name;
/*
* A pointer to a file attribute to be set on creation.
*
* Equivalent to: u8 *file_attr;
*/
__aligned_u64 file_attr;
/*
* Length of the data buffer specfied by file_attr.
* Max value: INCFS_MAX_FILE_ATTR_SIZE
*/
__u32 file_attr_len;
__u32 reserved4;
/* struct incfs_file_signature_info *signature_info; */
__aligned_u64 signature_info;
__aligned_u64 reserved5;
__aligned_u64 reserved6;
};
/*
* Request a digital signature blob for a given file.
* Argument for INCFS_IOC_READ_FILE_SIGNATURE ioctl
*/
struct incfs_get_file_sig_args {
/*
* A pointer to the data buffer to save an signature blob to.
*
* Equivalent to: u8 *file_signature;
*/
__aligned_u64 file_signature;
/* Size of the buffer at file_signature. */
__u32 file_signature_buf_size;
/*
* Number of bytes save file_signature buffer.
* It is set after ioctl done.
*/
__u32 file_signature_len_out;
};
#endif /* _UAPI_LINUX_INCREMENTALFS_H */

View File

@ -0,0 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4
CFLAGS += -I../../../../../usr/include/
CFLAGS += -I../../../../include/uapi/
CFLAGS += -I../../../../lib
EXTRA_SOURCES := utils.c
TEST_GEN_PROGS := incfs_test
include ../../lib.mk
$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES)
all: $(OUTPUT)incfs_test
clean:
rm -rf $(OUTPUT)incfs_test *.o

View File

@ -0,0 +1 @@
CONFIG_INCREMENTAL_FS=y

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,377 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2018 Google LLC
*/
#include <stdio.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <errno.h>
#include <string.h>
#include <poll.h>
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/pem.h>
#include <openssl/pkcs7.h>
#include <openssl/sha.h>
#include <openssl/md5.h>
#include "utils.h"
int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms)
{
static const char fs_name[] = INCFS_NAME;
char mount_options[512];
int result;
snprintf(mount_options, ARRAY_SIZE(mount_options),
"read_timeout_ms=%u",
read_timeout_ms);
result = mount(backing_dir, mount_dir, fs_name, 0, mount_options);
if (result != 0)
perror("Error mounting fs.");
return result;
}
int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt)
{
static const char fs_name[] = INCFS_NAME;
int result;
result = mount(backing_dir, mount_dir, fs_name, 0, opt);
if (result != 0)
perror("Error mounting fs.");
return result;
}
int unlink_node(int fd, int parent_ino, char *filename)
{
return 0;
}
static EVP_PKEY *deserialize_private_key(const char *pem_key)
{
BIO *bio = NULL;
EVP_PKEY *pkey = NULL;
int len = strlen(pem_key);
bio = BIO_new_mem_buf(pem_key, len);
if (!bio)
return NULL;
pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL);
BIO_free(bio);
return pkey;
}
static X509 *deserialize_cert(const char *pem_cert)
{
BIO *bio = NULL;
X509 *cert = NULL;
int len = strlen(pem_cert);
bio = BIO_new_mem_buf(pem_cert, len);
if (!bio)
return NULL;
cert = PEM_read_bio_X509(bio, NULL, NULL, NULL);
BIO_free(bio);
return cert;
}
bool sign_pkcs7(const void *data_to_sign, size_t data_size,
char *pkey_pem, char *cert_pem,
void **sig_ret, size_t *sig_size_ret)
{
/*
* PKCS#7 signing flags:
*
* - PKCS7_BINARY signing binary data, so skip MIME translation
*
* - PKCS7_NOATTR omit extra authenticated attributes, such as
* SMIMECapabilities
*
* - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then
* PKCS7_sign_add_signer() can add a signer later.
* This is necessary to change the message digest
* algorithm from the default of SHA-1. Requires
* OpenSSL 1.0.0 or later.
*/
int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL;
void *sig;
size_t sig_size;
BIO *bio = NULL;
PKCS7 *p7 = NULL;
EVP_PKEY *pkey = NULL;
X509 *cert = NULL;
bool ok = false;
const EVP_MD *md = EVP_sha256();
pkey = deserialize_private_key(pkey_pem);
if (!pkey) {
printf("deserialize_private_key failed\n");
goto out;
}
cert = deserialize_cert(cert_pem);
if (!cert) {
printf("deserialize_cert failed\n");
goto out;
}
bio = BIO_new_mem_buf(data_to_sign, data_size);
if (!bio)
goto out;
p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags);
if (!p7) {
printf("failed to initialize PKCS#7 signature object\n");
goto out;
}
if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) {
printf("failed to add signer to PKCS#7 signature object\n");
goto out;
}
if (PKCS7_final(p7, bio, pkcs7_flags) != 1) {
printf("failed to finalize PKCS#7 signature\n");
goto out;
}
BIO_free(bio);
bio = BIO_new(BIO_s_mem());
if (!bio) {
printf("out of memory\n");
goto out;
}
if (i2d_PKCS7_bio(bio, p7) != 1) {
printf("failed to DER-encode PKCS#7 signature object\n");
goto out;
}
sig_size = BIO_get_mem_data(bio, &sig);
*sig_ret = malloc(sig_size);
memcpy(*sig_ret, sig, sig_size);
*sig_size_ret = sig_size;
ok = true;
out:
PKCS7_free(p7);
BIO_free(bio);
return ok;
}
int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out,
size_t size, const char *root_hash, char *sig, size_t sig_size,
char *add_data)
{
int mode = __S_IFREG | 0555;
struct incfs_file_signature_info sig_info = {
.hash_tree_alg = root_hash
? INCFS_HASH_TREE_SHA256
: 0,
.root_hash = ptr_to_u64(root_hash),
.additional_data = ptr_to_u64(add_data),
.additional_data_size = strlen(add_data),
.signature = ptr_to_u64(sig),
.signature_size = sig_size,
};
struct incfs_new_file_args args = {
.size = size,
.mode = mode,
.file_name = ptr_to_u64(filename),
.directory_path = ptr_to_u64(dir),
.signature_info = ptr_to_u64(&sig_info),
.file_attr = 0,
.file_attr_len = 0
};
md5(filename, strlen(filename), (char *)args.file_id.bytes);
if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0)
return -errno;
*id_out = args.file_id;
return 0;
}
int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out,
size_t size, char *attr)
{
int mode = __S_IFREG | 0555;
struct incfs_file_signature_info sig_info = {
.hash_tree_alg = 0,
.root_hash = ptr_to_u64(NULL)
};
struct incfs_new_file_args args = {
.size = size,
.mode = mode,
.file_name = ptr_to_u64(filename),
.directory_path = ptr_to_u64(dir),
.signature_info = ptr_to_u64(&sig_info),
.file_attr = ptr_to_u64(attr),
.file_attr_len = attr ? strlen(attr) : 0
};
md5(filename, strlen(filename), (char *)args.file_id.bytes);
if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0)
return -errno;
*id_out = args.file_id;
return 0;
}
int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size)
{
return 0;
}
int get_file_signature(int fd, unsigned char *buf, int buf_size)
{
struct incfs_get_file_sig_args args = {
.file_signature = ptr_to_u64(buf),
.file_signature_buf_size = buf_size
};
if (ioctl(fd, INCFS_IOC_READ_FILE_SIGNATURE, &args) == 0)
return args.file_signature_len_out;
return -errno;
}
loff_t get_file_size(char *name)
{
struct stat st;
if (stat(name, &st) == 0)
return st.st_size;
return -ENOENT;
}
int open_commands_file(char *mount_dir)
{
char cmd_file[255];
int cmd_fd;
snprintf(cmd_file, ARRAY_SIZE(cmd_file),
"%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME);
cmd_fd = open(cmd_file, O_RDONLY);
if (cmd_fd < 0)
perror("Can't open commands file");
return cmd_fd;
}
int open_log_file(char *mount_dir)
{
char cmd_file[255];
int cmd_fd;
snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir);
cmd_fd = open(cmd_file, O_RDWR);
if (cmd_fd < 0)
perror("Can't open log file");
return cmd_fd;
}
int wait_for_pending_reads(int fd, int timeout_ms,
struct incfs_pending_read_info *prs, int prs_count)
{
ssize_t read_res = 0;
if (timeout_ms > 0) {
int poll_res = 0;
struct pollfd pollfd = {
.fd = fd,
.events = POLLIN
};
poll_res = poll(&pollfd, 1, timeout_ms);
if (poll_res < 0)
return -errno;
if (poll_res == 0)
return 0;
if (!(pollfd.revents | POLLIN))
return 0;
}
read_res = read(fd, prs, prs_count * sizeof(*prs));
if (read_res < 0)
return -errno;
return read_res / sizeof(*prs);
}
char *concat_file_name(const char *dir, char *file)
{
char full_name[FILENAME_MAX] = "";
if (snprintf(full_name, ARRAY_SIZE(full_name), "%s/%s", dir, file) < 0)
return NULL;
return strdup(full_name);
}
int delete_dir_tree(const char *dir_path)
{
DIR *dir = NULL;
struct dirent *dp;
int result = 0;
dir = opendir(dir_path);
if (!dir) {
result = -errno;
goto out;
}
while ((dp = readdir(dir))) {
char *full_path;
if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, ".."))
continue;
full_path = concat_file_name(dir_path, dp->d_name);
if (dp->d_type == DT_DIR)
result = delete_dir_tree(full_path);
else
result = unlink(full_path);
free(full_path);
if (result)
goto out;
}
out:
if (dir)
closedir(dir);
if (!result)
rmdir(dir_path);
return result;
}
void sha256(char *data, size_t dsize, char *hash)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, data, dsize);
SHA256_Final((unsigned char *)hash, &ctx);
}
void md5(char *data, size_t dsize, char *hash)
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, data, dsize);
MD5_Final((unsigned char *)hash, &ctx);
}

View File

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2019 Google LLC
*/
#include <stdbool.h>
#include <sys/stat.h>
#include "../../include/uapi/linux/incrementalfs.h"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#ifdef __LP64__
#define ptr_to_u64(p) ((__u64)p)
#else
#define ptr_to_u64(p) ((__u64)(__u32)p)
#endif
#define SHA256_DIGEST_SIZE 32
int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms);
int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt);
int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size);
int get_file_signature(int fd, unsigned char *buf, int buf_size);
int emit_node(int fd, char *filename, int *ino_out, int parent_ino,
size_t size, mode_t mode, char *attr);
int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out,
size_t size, char *attr);
int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out,
size_t size, const char *root_hash, char *sig, size_t sig_size,
char *add_data);
int unlink_node(int fd, int parent_ino, char *filename);
loff_t get_file_size(char *name);
int open_commands_file(char *mount_dir);
int open_log_file(char *mount_dir);
int wait_for_pending_reads(int fd, int timeout_ms,
struct incfs_pending_read_info *prs, int prs_count);
char *concat_file_name(const char *dir, char *file);
void sha256(char *data, size_t dsize, char *hash);
void md5(char *data, size_t dsize, char *hash);
bool sign_pkcs7(const void *data_to_sign, size_t data_size,
char *pkey_pem, char *cert_pem,
void **sig_ret, size_t *sig_size_ret);
int delete_dir_tree(const char *path);