Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: use interruptible wait when duration is controlled by userspace. md/raid5: suspend shouldn't affect read requests. md: tidy up error paths in md_alloc md: fix error path when duplicate name is found on md device creation. md: avoid dereferencing NULL pointer when accessing suspend_* sysfs attributes. md: Use new topology calls to indicate alignment and I/O sizes
This commit is contained in:
@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||||||
rdev->sectors = sectors * mddev->chunk_sectors;
|
rdev->sectors = sectors * mddev->chunk_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_queue_stack_limits(mddev->queue,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->bdev->bd_disk->queue);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
|
@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
|
|||||||
char *e;
|
char *e;
|
||||||
unsigned long long new = simple_strtoull(buf, &e, 10);
|
unsigned long long new = simple_strtoull(buf, &e, 10);
|
||||||
|
|
||||||
if (mddev->pers->quiesce == NULL)
|
if (mddev->pers == NULL ||
|
||||||
|
mddev->pers->quiesce == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (buf == e || (*e && *e != '\n'))
|
if (buf == e || (*e && *e != '\n'))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
|
|||||||
char *e;
|
char *e;
|
||||||
unsigned long long new = simple_strtoull(buf, &e, 10);
|
unsigned long long new = simple_strtoull(buf, &e, 10);
|
||||||
|
|
||||||
if (mddev->pers->quiesce == NULL)
|
if (mddev->pers == NULL ||
|
||||||
|
mddev->pers->quiesce == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (buf == e || (*e && *e != '\n'))
|
if (buf == e || (*e && *e != '\n'))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -3844,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
flush_scheduled_work();
|
flush_scheduled_work();
|
||||||
|
|
||||||
mutex_lock(&disks_mutex);
|
mutex_lock(&disks_mutex);
|
||||||
if (mddev->gendisk) {
|
error = -EEXIST;
|
||||||
mutex_unlock(&disks_mutex);
|
if (mddev->gendisk)
|
||||||
mddev_put(mddev);
|
goto abort;
|
||||||
return -EEXIST;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (name) {
|
if (name) {
|
||||||
/* Need to ensure that 'name' is not a duplicate.
|
/* Need to ensure that 'name' is not a duplicate.
|
||||||
@ -3860,17 +3860,15 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
if (mddev2->gendisk &&
|
if (mddev2->gendisk &&
|
||||||
strcmp(mddev2->gendisk->disk_name, name) == 0) {
|
strcmp(mddev2->gendisk->disk_name, name) == 0) {
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
return -EEXIST;
|
goto abort;
|
||||||
}
|
}
|
||||||
spin_unlock(&all_mddevs_lock);
|
spin_unlock(&all_mddevs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
error = -ENOMEM;
|
||||||
mddev->queue = blk_alloc_queue(GFP_KERNEL);
|
mddev->queue = blk_alloc_queue(GFP_KERNEL);
|
||||||
if (!mddev->queue) {
|
if (!mddev->queue)
|
||||||
mutex_unlock(&disks_mutex);
|
goto abort;
|
||||||
mddev_put(mddev);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
mddev->queue->queuedata = mddev;
|
mddev->queue->queuedata = mddev;
|
||||||
|
|
||||||
/* Can be unlocked because the queue is new: no concurrency */
|
/* Can be unlocked because the queue is new: no concurrency */
|
||||||
@ -3880,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
|
|
||||||
disk = alloc_disk(1 << shift);
|
disk = alloc_disk(1 << shift);
|
||||||
if (!disk) {
|
if (!disk) {
|
||||||
mutex_unlock(&disks_mutex);
|
|
||||||
blk_cleanup_queue(mddev->queue);
|
blk_cleanup_queue(mddev->queue);
|
||||||
mddev->queue = NULL;
|
mddev->queue = NULL;
|
||||||
mddev_put(mddev);
|
goto abort;
|
||||||
return -ENOMEM;
|
|
||||||
}
|
}
|
||||||
disk->major = MAJOR(mddev->unit);
|
disk->major = MAJOR(mddev->unit);
|
||||||
disk->first_minor = unit << shift;
|
disk->first_minor = unit << shift;
|
||||||
@ -3906,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name)
|
|||||||
mddev->gendisk = disk;
|
mddev->gendisk = disk;
|
||||||
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
|
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
|
||||||
&disk_to_dev(disk)->kobj, "%s", "md");
|
&disk_to_dev(disk)->kobj, "%s", "md");
|
||||||
mutex_unlock(&disks_mutex);
|
if (error) {
|
||||||
if (error)
|
/* This isn't possible, but as kobject_init_and_add is marked
|
||||||
|
* __must_check, we must do something with the result
|
||||||
|
*/
|
||||||
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
|
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
|
||||||
disk->disk_name);
|
disk->disk_name);
|
||||||
else {
|
error = 0;
|
||||||
|
}
|
||||||
|
abort:
|
||||||
|
mutex_unlock(&disks_mutex);
|
||||||
|
if (!error) {
|
||||||
kobject_uevent(&mddev->kobj, KOBJ_ADD);
|
kobject_uevent(&mddev->kobj, KOBJ_ADD);
|
||||||
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
|
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
|
||||||
}
|
}
|
||||||
mddev_put(mddev);
|
mddev_put(mddev);
|
||||||
return 0;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kobject *md_probe(dev_t dev, int *part, void *data)
|
static struct kobject *md_probe(dev_t dev, int *part, void *data)
|
||||||
@ -6334,10 +6336,16 @@ void md_do_sync(mddev_t *mddev)
|
|||||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (j >= mddev->resync_max)
|
while (j >= mddev->resync_max && !kthread_should_stop()) {
|
||||||
wait_event(mddev->recovery_wait,
|
/* As this condition is controlled by user-space,
|
||||||
mddev->resync_max > j
|
* we can block indefinitely, so use '_interruptible'
|
||||||
|| kthread_should_stop());
|
* to avoid triggering warnings.
|
||||||
|
*/
|
||||||
|
flush_signals(current); /* just in case */
|
||||||
|
wait_event_interruptible(mddev->recovery_wait,
|
||||||
|
mddev->resync_max > j
|
||||||
|
|| kthread_should_stop());
|
||||||
|
}
|
||||||
|
|
||||||
if (kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
goto interrupted;
|
goto interrupted;
|
||||||
|
@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
for (path = first; path <= last; path++)
|
for (path = first; path <= last; path++)
|
||||||
if ((p=conf->multipaths+path)->rdev == NULL) {
|
if ((p=conf->multipaths+path)->rdev == NULL) {
|
||||||
q = rdev->bdev->bd_disk->queue;
|
q = rdev->bdev->bd_disk->queue;
|
||||||
blk_queue_stack_limits(mddev->queue, q);
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
|
rdev->data_offset << 9);
|
||||||
|
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev)
|
|||||||
|
|
||||||
disk = conf->multipaths + disk_idx;
|
disk = conf->multipaths + disk_idx;
|
||||||
disk->rdev = rdev;
|
disk->rdev = rdev;
|
||||||
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
|
rdev->data_offset << 9);
|
||||||
|
|
||||||
blk_queue_stack_limits(mddev->queue,
|
|
||||||
rdev->bdev->bd_disk->queue);
|
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, not that we ever expect a device with
|
* violating it, not that we ever expect a device with
|
||||||
* a merge_bvec_fn to be involved in multipath */
|
* a merge_bvec_fn to be involved in multipath */
|
||||||
|
@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev)
|
|||||||
}
|
}
|
||||||
dev[j] = rdev1;
|
dev[j] = rdev1;
|
||||||
|
|
||||||
blk_queue_stack_limits(mddev->queue,
|
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
||||||
rdev1->bdev->bd_disk->queue);
|
rdev1->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev)
|
|||||||
mddev->chunk_sectors << 9);
|
mddev->chunk_sectors << 9);
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||||
|
blk_queue_io_opt(mddev->queue,
|
||||||
|
(mddev->chunk_sectors << 9) * mddev->raid_disks);
|
||||||
|
|
||||||
printk(KERN_INFO "raid0: done.\n");
|
printk(KERN_INFO "raid0: done.\n");
|
||||||
mddev->private = conf;
|
mddev->private = conf;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
for (mirror = first; mirror <= last; mirror++)
|
for (mirror = first; mirror <= last; mirror++)
|
||||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
if ( !(p=conf->mirrors+mirror)->rdev) {
|
||||||
|
|
||||||
blk_queue_stack_limits(mddev->queue,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->bdev->bd_disk->queue);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev)
|
|||||||
disk = conf->mirrors + disk_idx;
|
disk = conf->mirrors + disk_idx;
|
||||||
|
|
||||||
disk->rdev = rdev;
|
disk->rdev = rdev;
|
||||||
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
blk_queue_stack_limits(mddev->queue,
|
rdev->data_offset << 9);
|
||||||
rdev->bdev->bd_disk->queue);
|
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
|
@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
for ( ; mirror <= last ; mirror++)
|
for ( ; mirror <= last ; mirror++)
|
||||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
if ( !(p=conf->mirrors+mirror)->rdev) {
|
||||||
|
|
||||||
blk_queue_stack_limits(mddev->queue,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->bdev->bd_disk->queue);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
|
|||||||
static int run(mddev_t *mddev)
|
static int run(mddev_t *mddev)
|
||||||
{
|
{
|
||||||
conf_t *conf;
|
conf_t *conf;
|
||||||
int i, disk_idx;
|
int i, disk_idx, chunk_size;
|
||||||
mirror_info_t *disk;
|
mirror_info_t *disk;
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
int nc, fc, fo;
|
int nc, fc, fo;
|
||||||
@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev)
|
|||||||
spin_lock_init(&conf->device_lock);
|
spin_lock_init(&conf->device_lock);
|
||||||
mddev->queue->queue_lock = &conf->device_lock;
|
mddev->queue->queue_lock = &conf->device_lock;
|
||||||
|
|
||||||
|
chunk_size = mddev->chunk_sectors << 9;
|
||||||
|
blk_queue_io_min(mddev->queue, chunk_size);
|
||||||
|
if (conf->raid_disks % conf->near_copies)
|
||||||
|
blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks);
|
||||||
|
else
|
||||||
|
blk_queue_io_opt(mddev->queue, chunk_size *
|
||||||
|
(conf->raid_disks / conf->near_copies));
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
disk_idx = rdev->raid_disk;
|
disk_idx = rdev->raid_disk;
|
||||||
if (disk_idx >= mddev->raid_disks
|
if (disk_idx >= mddev->raid_disks
|
||||||
@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev)
|
|||||||
disk = conf->mirrors + disk_idx;
|
disk = conf->mirrors + disk_idx;
|
||||||
|
|
||||||
disk->rdev = rdev;
|
disk->rdev = rdev;
|
||||||
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
blk_queue_stack_limits(mddev->queue,
|
rdev->data_offset << 9);
|
||||||
rdev->bdev->bd_disk->queue);
|
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
/* as we don't honour merge_bvec_fn, we must never risk
|
||||||
* violating it, so limit ->max_sector to one PAGE, as
|
* violating it, so limit ->max_sector to one PAGE, as
|
||||||
* a one page request is never in violation.
|
* a one page request is never in violation.
|
||||||
|
@ -3699,13 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi)
|
|||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* FIXME what if we get a false positive because these
|
|
||||||
* are being updated.
|
if (bio_data_dir(bi) == WRITE &&
|
||||||
*/
|
logical_sector >= mddev->suspend_lo &&
|
||||||
if (logical_sector >= mddev->suspend_lo &&
|
|
||||||
logical_sector < mddev->suspend_hi) {
|
logical_sector < mddev->suspend_hi) {
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
schedule();
|
/* As the suspend_* range is controlled by
|
||||||
|
* userspace, we want an interruptible
|
||||||
|
* wait.
|
||||||
|
*/
|
||||||
|
flush_signals(current);
|
||||||
|
prepare_to_wait(&conf->wait_for_overlap,
|
||||||
|
&w, TASK_INTERRUPTIBLE);
|
||||||
|
if (logical_sector >= mddev->suspend_lo &&
|
||||||
|
logical_sector < mddev->suspend_hi)
|
||||||
|
schedule();
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4452,7 +4460,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
|
|||||||
static int run(mddev_t *mddev)
|
static int run(mddev_t *mddev)
|
||||||
{
|
{
|
||||||
raid5_conf_t *conf;
|
raid5_conf_t *conf;
|
||||||
int working_disks = 0;
|
int working_disks = 0, chunk_size;
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
|
|
||||||
if (mddev->recovery_cp != MaxSector)
|
if (mddev->recovery_cp != MaxSector)
|
||||||
@ -4607,6 +4615,14 @@ static int run(mddev_t *mddev)
|
|||||||
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
|
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
|
||||||
|
|
||||||
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
|
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
|
||||||
|
chunk_size = mddev->chunk_sectors << 9;
|
||||||
|
blk_queue_io_min(mddev->queue, chunk_size);
|
||||||
|
blk_queue_io_opt(mddev->queue, chunk_size *
|
||||||
|
(conf->raid_disks - conf->max_degraded));
|
||||||
|
|
||||||
|
list_for_each_entry(rdev, &mddev->disks, same_set)
|
||||||
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
|
rdev->data_offset << 9);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
abort:
|
abort:
|
||||||
|
Reference in New Issue
Block a user