===== drivers/block/scsi_ioctl.c 1.12 vs edited ===== --- 1.12/drivers/block/scsi_ioctl.c Tue Oct 15 20:50:18 2002 +++ edited/drivers/block/scsi_ioctl.c Sat Oct 19 02:01:41 2002 @@ -29,11 +29,14 @@ #include #include #include +#include #include #include +#define BLK_DEFAULT_TIMEOUT (60 * HZ) + int blk_do_rq(request_queue_t *q, struct request *rq) { DECLARE_COMPLETION(wait); @@ -74,30 +77,32 @@ static int sg_get_timeout(request_queue_t *q) { - return HZ; + return q->sg_timeout; } static int sg_set_timeout(request_queue_t *q, int *p) { - int timeout; - int error = get_user(timeout, p); - return error; -} + int timeout, err = get_user(timeout, p); -static int reserved_size = 0; + if (!err) + q->sg_timeout = timeout; + + return err; +} static int sg_get_reserved_size(request_queue_t *q, int *p) { - return put_user(reserved_size, p); + return put_user(q->sg_reserved_size, p); } static int sg_set_reserved_size(request_queue_t *q, int *p) { - int size; - int error = get_user(size, p); - if (!error) - reserved_size = size; - return error; + int size, err = get_user(size, p); + + if (!err) + q->sg_reserved_size = size; + + return err; } static int sg_emulated_host(request_queue_t *q, int *p) @@ -105,11 +110,14 @@ return put_user(1, p); } -static int sg_io(request_queue_t *q, struct sg_io_hdr *uptr) +static int sg_io(request_queue_t *q, struct block_device *bdev, + struct sg_io_hdr *uptr) { - int err; + unsigned long uaddr, start_time; + int err, reading, writing; struct sg_io_hdr hdr; struct request *rq; + struct bio *bio; void *buffer; if (!access_ok(VERIFY_WRITE, uptr, sizeof(*uptr))) @@ -117,47 +125,116 @@ if (copy_from_user(&hdr, uptr, sizeof(*uptr))) return -EFAULT; - if ( hdr.cmd_len > sizeof(rq->cmd) ) + if (hdr.cmd_len > sizeof(rq->cmd)) return -EINVAL; + if (!access_ok(VERIFY_READ, hdr.cmdp, hdr.cmd_len)) + return -EFAULT; + + if (hdr.dxfer_len > 65536) + return -EINVAL; + + /* + * we'll do that later + */ + if (hdr.iovec_count) + return -EOPNOTSUPP; + reading = writing = 0; buffer = NULL; + bio = NULL; if (hdr.dxfer_len) { unsigned int bytes = (hdr.dxfer_len + 511) & ~511; switch (hdr.dxfer_direction) { default: return -EINVAL; + case SG_DXFER_TO_FROM_DEV: + reading = 1; + /* fall through */ case SG_DXFER_TO_DEV: + writing = 1; + break; case SG_DXFER_FROM_DEV: - case SG_DXFER_TO_FROM_DEV: + reading = 1; break; } - buffer = kmalloc(bytes, GFP_USER); - if (!buffer) - return -ENOMEM; - if (hdr.dxfer_direction == SG_DXFER_TO_DEV || - hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV) - copy_from_user(buffer, hdr.dxferp, hdr.dxfer_len); + + uaddr = (unsigned long) hdr.dxferp; + if (writing && !access_ok(VERIFY_WRITE, uaddr, bytes)) + return -EFAULT; + if (reading && !access_ok(VERIFY_READ, uaddr, bytes)) + return -EFAULT; + + /* + * first try to map it into a bio. reading from device will + * be a write to vm. + */ + bio = bio_map_user(bdev, uaddr, hdr.dxfer_len, reading); + if (bio && bio->bi_size < hdr.dxfer_len) { + bio_endio(bio, bio->bi_size, 0); + bio = NULL; + } + + /* + * if bio setup failed, fall back to slow approach + */ + if (!bio) { + buffer = kmalloc(bytes, GFP_USER); + if (!buffer) + return -ENOMEM; + if (writing) + copy_from_user(buffer,hdr.dxferp,hdr.dxfer_len); + } } rq = blk_get_request(q, WRITE, __GFP_WAIT); - rq->timeout = 60*HZ; + rq->bio = rq->biotail = bio; rq->data = buffer; + + if (bio) { + if (writing) + bio->bi_rw |= (1 << BIO_RW); + + rq->buffer = bio_data(bio); + rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); + rq->current_nr_sectors = bio_cur_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; + rq->nr_phys_segments = bio_phys_segments(q, bio); + rq->nr_hw_segments = bio_hw_segments(q, bio); + } + + rq->rq_dev = to_kdev_t(bdev->bd_dev); + rq->rq_disk = bdev->bd_disk; + + rq->timeout = hdr.timeout; + if (!rq->timeout) + rq->timeout = q->sg_timeout; + if (!rq->timeout) + rq->timeout = BLK_DEFAULT_TIMEOUT; + rq->data_len = hdr.dxfer_len; - rq->flags = REQ_BLOCK_PC; + rq->flags |= REQ_BLOCK_PC; + if (writing) + rq->flags |= REQ_RW; + memset(rq->cmd, 0, sizeof(rq->cmd)); copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len); + + start_time = jiffies; err = blk_do_rq(q, rq); + hdr.duration = (jiffies - start_time) * 1000 / HZ; blk_put_request(rq); copy_to_user(uptr, &hdr, sizeof(*uptr)); + if (buffer) { - if (hdr.dxfer_direction == SG_DXFER_FROM_DEV || - hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV) + if (reading) copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len); + kfree(buffer); } + return err; } @@ -189,7 +266,7 @@ case SG_EMULATED_HOST: return sg_emulated_host(q, (int *) arg); case SG_IO: - return sg_io(q, (struct sg_io_hdr *) arg); + return sg_io(q, bdev, (struct sg_io_hdr *) arg); case CDROMCLOSETRAY: close = 1; case CDROMEJECT: @@ -197,7 +274,7 @@ rq->flags = REQ_BLOCK_PC; rq->data = NULL; rq->data_len = 0; - rq->timeout = 60*HZ; + rq->timeout = BLK_DEFAULT_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_START_STOP_UNIT; rq->cmd[4] = 0x02 + (close != 0); ===== drivers/ide/ide-cd.c 1.27 vs edited ===== --- 1.27/drivers/ide/ide-cd.c Fri Oct 18 20:02:55 2002 +++ edited/drivers/ide/ide-cd.c Sat Oct 19 01:37:23 2002 @@ -608,7 +608,7 @@ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) return ide_stopped; /* retry only "normal" I/O: */ - if ((rq->flags & REQ_DRIVE_CMD) || (rq->flags & REQ_DRIVE_TASK)) { + if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { rq->errors = 1; ide_end_drive_cmd(drive, stat, err); return ide_stopped; @@ -635,7 +635,7 @@ return ide_stopped; } -static void cdrom_end_request (ide_drive_t *drive, int uptodate) +static int cdrom_end_request (ide_drive_t *drive, int uptodate) { struct request *rq = HWGROUP(drive)->rq; @@ -651,10 +651,11 @@ cdrom_analyze_sense_data(drive, failed, sense); } - if (blk_fs_request(rq) && !rq->current_nr_sectors) - uptodate = 1; + if (!rq->current_nr_sectors) + if (blk_fs_request(rq) || blk_pc_request(rq)) + uptodate = 1; - ide_end_request(drive, uptodate, rq->hard_cur_sectors); + return ide_end_request(drive, uptodate, rq->hard_cur_sectors); } /* Handle differences between SCSI and ATAPI packet commands */ @@ -779,8 +780,10 @@ queue a request sense command. */ if ((stat & ERR_STAT) != 0) cdrom_queue_request_sense(drive, NULL, NULL, NULL); - } else - blk_dump_rq_flags(rq, "ide-cd bad flags"); + } else { + blk_dump_rq_flags(rq, "ide-cd: bad rq"); + cdrom_end_request(drive, 0); + } /* Retry, or handle the next request. */ *startstop = ide_stopped; @@ -848,7 +851,7 @@ HWIF(drive)->OUTB(xferlen >> 8 , IDE_BCOUNTH_REG); if (IDE_CONTROL_REG) HWIF(drive)->OUTB(drive->ctl, IDE_CONTROL_REG); - + if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { if (HWGROUP(drive)->handler != NULL) BUG(); @@ -876,9 +879,6 @@ struct request *rq, ide_handler_t *handler) { - unsigned char *cmd_buf = rq->cmd; - int cmd_len = sizeof(rq->cmd); - unsigned int timeout = rq->timeout; struct cdrom_info *info = drive->driver_data; ide_startstop_t startstop; @@ -901,10 +901,10 @@ BUG(); /* Arm the interrupt handler. */ - ide_set_handler(drive, handler, timeout, cdrom_timer_expiry); + ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry); /* Send the command to the device. */ - HWIF(drive)->atapi_output_bytes(drive, cmd_buf, cmd_len); + HWIF(drive)->atapi_output_bytes(drive, rq->cmd, sizeof(rq->cmd)); /* Start the DMA if need be */ if (info->dma) @@ -1016,7 +1016,9 @@ struct request *rq = HWGROUP(drive)->rq; - /* Check for errors. */ + /* + * handle dma case + */ if (dma) { info->dma = 0; if ((dma_error = HWIF(drive)->ide_dma_end(drive))) @@ -1025,15 +1027,20 @@ if (cdrom_decode_status (&startstop, drive, 0, &stat)) return startstop; - + if (dma) { if (!dma_error) { - ide_end_request(drive, 1, rq->nr_sectors); + if (ide_end_request(drive, 1, rq->nr_sectors)) + printk("%s: bad end_request return\n", __FUNCTION__); return ide_stopped; } else return DRIVER(drive)->error(drive, "dma error", stat); } + /* + * below is the pio data handling + */ + /* Read the interrupt reason and the transfer length. */ ireason = HWIF(drive)->INB(IDE_IREASON_REG); lowcyl = HWIF(drive)->INB(IDE_BCOUNTL_REG); @@ -1080,7 +1087,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN((int)(rq->current_nr_sectors - bio_sectors(rq->bio)), sectors_to_transfer); + nskip = MIN((int)(rq->current_nr_sectors - bio_cur_sectors(rq->bio)), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1107,6 +1114,9 @@ cdrom_buffer_sectors(drive, rq->sector, sectors_to_transfer); sectors_to_transfer = 0; } else { + if (rq->bio) + rq->buffer = bio_data(rq->bio); + /* Transfer data to the buffers. Figure out how many sectors we can transfer to the current buffer. */ @@ -1180,7 +1190,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < bio_sectors(rq->bio) && + if (rq->current_nr_sectors < bio_cur_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, (long)rq->sector); @@ -1218,7 +1228,7 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != bio_sectors(rq->bio) && + if (rq->current_nr_sectors != bio_cur_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); @@ -1318,7 +1328,7 @@ rq->nr_sectors += n; rq->sector -= n; } - rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); + rq->hard_cur_sectors = rq->current_nr_sectors = bio_cur_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; rq->q->prep_rq_fn(rq->q, rq); @@ -1606,6 +1616,9 @@ struct request *rq = HWGROUP(drive)->rq; + if (rq->rq_status == RQ_INACTIVE) + printk("%s: rq is gone\n", __FUNCTION__); + /* Check for errors. */ if (dma) { info->dma = 0; @@ -1619,7 +1632,7 @@ printk("ide-cd: write_intr decode_status bad\n"); return startstop; } - + /* * using dma, transfer is complete now */ @@ -1671,6 +1684,9 @@ break; } + if (rq->bio) + rq->buffer = bio_data(rq->bio); + /* * Figure out how many sectors we can transfer */ @@ -1755,6 +1771,7 @@ static int pre_transform_command(struct request *req) { u8 *c = req->cmd; + /* Transform 6-byte read/write commands to the 10-byte version. */ if (c[0] == READ_6 || c[0] == WRITE_6) { c[8] = c[4]; @@ -1767,21 +1784,107 @@ return 0; } - /* These also need fixup, not done yet */ - if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) - return -EINVAL; + if (!req->data_len || !req->data) + return 0; + if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { + char *buffer = kmalloc(req->data_len + 4, GFP_ATOMIC); + char *obuf = req->data; + char old_cdb[6]; + int new_len; + + if (!buffer) + return -ENOMEM; + + if (req->bio) + obuf = bio_data(req->bio); + + memcpy(old_cdb, req->cmd, sizeof(old_cdb)); + memset(buffer, 0, req->data_len + 4); + memset(req->cmd, 0, sizeof(req->cmd)); + c[0] = old_cdb[0] | 0x40; + c[1] = old_cdb[1]; + c[2] = old_cdb[2]; + new_len = old_cdb[4] + 4; + c[8] = new_len & 0xff; + c[7] = (new_len >> 8) & 0xff; + c[9] = old_cdb[5]; + if (c[0] == GPCMD_MODE_SELECT_10) { + buffer[1] = obuf[0]; + buffer[2] = obuf[1]; + buffer[3] = obuf[2]; + buffer[7] = obuf[3]; + memcpy(buffer + 8, obuf + 4, req->data_len - 4); + } + req->special = req->data; + req->data = buffer; + req->data_len += 4; + if (req->bio) + req->bio->bi_size += 4; + } + return 0; } static void post_transform_command(struct request *req) { + char *obuf = req->special; + char *ibuf = req->data; + u8 *c = req->cmd; + + /* + * set ansi-revision and response data as atapi + */ + if (c[0] == GPCMD_INQUIRY) { + ibuf[2] |= 2; + ibuf[3] = (ibuf[3] & 0xf0) | 2; + return; + } + + if (req->bio) + obuf = bio_data(req->bio); + + if (c[0] != GPCMD_MODE_SENSE_10 || !obuf) + return; + + /* + * command was transformed MODE_6 + */ + obuf[0] = ibuf[1]; + obuf[1] = ibuf[2]; + obuf[2] = ibuf[3]; + obuf[3] = ibuf[7]; + memcpy(obuf + 4, ibuf + 8, req->data_len - 8); + + kfree(ibuf); + req->special = NULL; + req->data = obuf; + req->data_len -= 4; + if (req->bio) + req->bio->bi_size -= 4; +} + +static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) +{ + struct cdrom_info *info = drive->driver_data; + struct request *rq = HWGROUP(drive)->rq; + ide_handler_t *handler; + + if (!rq->timeout) + rq->timeout = WAIT_CMD; + + if (info->cmd == READ) + handler = cdrom_read_intr; + else + handler = cdrom_write_intr; + + return cdrom_transfer_packet_command(drive, rq, handler); } static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { - ide_startstop_t startstop; - struct cdrom_info *info; + struct cdrom_info *info = drive->driver_data; + ide_handler_t *handler; if (pre_transform_command(rq) < 0) { cdrom_end_request(drive, 0); @@ -1790,14 +1893,23 @@ rq->flags |= REQ_QUIET; - info = drive->driver_data; - info->dma = 0; - info->cmd = 0; + if (rq->bio) { + if (rq->data_len & 511) { + printk("%s: block pc not aligned, len=%d\n", drive->name, rq->data_len); + cdrom_end_request(drive, 0); + return ide_stopped; + } + info->dma = drive->using_dma; + info->cmd = rq_data_dir(rq); + handler = cdrom_do_newpc_cont; + } else { + info->dma = 0; + info->cmd = 0; + handler = cdrom_do_pc_continuation; + } /* Start sending the command to the drive. */ - startstop = cdrom_start_packet_command(drive, rq->data_len, cdrom_do_pc_continuation); - - return startstop; + return cdrom_start_packet_command(drive, rq->data_len, handler); } /**************************************************************************** ===== drivers/ide/ide-disk.c 1.27 vs edited ===== --- 1.27/drivers/ide/ide-disk.c Tue Oct 15 22:54:07 2002 +++ edited/drivers/ide/ide-disk.c Fri Oct 18 20:42:33 2002 @@ -1610,56 +1610,6 @@ #endif } -static int idedisk_suspend(struct device *dev, u32 state, u32 level) -{ - ide_drive_t *drive = dev->driver_data; - - printk("Suspending device %p\n", dev->driver_data); - - /* I hope that every freeze operation from the upper levels have - * already been done... - */ - - if (level != SUSPEND_SAVE_STATE) - return 0; - BUG_ON(in_interrupt()); - - printk("Waiting for commands to finish\n"); - - /* wait until all commands are finished */ - /* FIXME: waiting for spinlocks should be done instead. */ - if (!(HWGROUP(drive))) - printk("No hwgroup?\n"); - while (HWGROUP(drive)->handler) - yield(); - - /* set the drive to standby */ - printk(KERN_INFO "suspending: %s ", drive->name); - if (drive->driver) { - if (drive->driver->standby) - drive->driver->standby(drive); - } - drive->blocked = 1; - - while (HWGROUP(drive)->handler) - yield(); - - return 0; -} - -static int idedisk_resume(struct device *dev, u32 level) -{ - ide_drive_t *drive = dev->driver_data; - - if (level != RESUME_RESTORE_STATE) - return 0; - if (!drive->blocked) - panic("ide: Resume but not suspended?\n"); - - drive->blocked = 0; - return 0; -} - /* This is just a hook for the overall driver tree. */ ===== drivers/ide/ide.c 1.33 vs edited ===== --- 1.33/drivers/ide/ide.c Fri Oct 18 21:44:11 2002 +++ edited/drivers/ide/ide.c Sat Oct 19 01:45:30 2002 @@ -391,7 +391,11 @@ spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; - BUG_ON(!(rq->flags & REQ_STARTED)); + if (!(rq->flags & REQ_STARTED)) { + blk_dump_rq_flags(rq, "ide_end_request"); + BUG(); + } + //BUG_ON(!(rq->flags & REQ_STARTED)); if (!nr_sectors) nr_sectors = rq->hard_cur_sectors; @@ -878,13 +882,12 @@ { ide_startstop_t startstop; unsigned long block; - ide_hwif_t *hwif = HWIF(drive); BUG_ON(!(rq->flags & REQ_STARTED)); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", - hwif->name, (unsigned long) rq); + HWIF(drive)->name, (unsigned long) rq); #endif /* bail early if we've exceeded max_failures */ @@ -910,7 +913,7 @@ block = 1; /* redirect MBR access to EZ-Drive partn table */ #if (DISK_RECOVERY_TIME > 0) - while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME); + while ((read_timer() - HWIF(drive)->last_time) < DISK_RECOVERY_TIME); #endif SELECT_DRIVE(drive); @@ -1515,7 +1518,6 @@ { unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - unsigned int major = HWIF(drive)->major; request_queue_t *q = &drive->queue; struct list_head *queue_head = &q->queue_head; DECLARE_COMPLETION(wait); @@ -2581,8 +2583,11 @@ static int ide_check_media_change(struct gendisk *disk) { ide_drive_t *drive = disk->private_data; - if (drive->driver != NULL) + printk("%s: drive is %p\n", __FUNCTION__, drive); + if (drive->driver != NULL) { + printk("%s: calling %p media change\n", __FUNCTION__, DRIVER(drive)->media_change); return DRIVER(drive)->media_change(drive); + } return 0; } @@ -3369,7 +3374,7 @@ list_del_init(&drive->list); ata_attach(drive); } - driver->gen_driver.name = driver->name; + driver->gen_driver.name = (char *) driver->name; driver->gen_driver.bus = &ide_bus_type; driver->gen_driver.remove = ide_drive_remove; return driver_register(&driver->gen_driver); ===== drivers/md/linear.c 1.20 vs edited ===== --- 1.20/drivers/md/linear.c Wed Oct 16 06:49:22 2002 +++ edited/drivers/md/linear.c Fri Oct 18 20:28:57 2002 @@ -52,8 +52,7 @@ * @bio: the buffer head that's been built up so far * @biovec: the request that could be merged to it. * - * Return 1 if the merge is not permitted (because the - * result would cross a device boundary), 0 otherwise. + * FIXME: return amount we can take at this offset, not a bool */ static int linear_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) { @@ -64,7 +63,10 @@ dev1 = which_dev(mddev, bio->bi_sector + ((bio->bi_size + biovec->bv_len - 1) >> 9)); - return dev0 != dev1; + if (dev0 == dev1) + return biovec->bv_len; + + return 0; } static int linear_run (mddev_t *mddev) ===== drivers/md/raid0.c 1.18 vs edited ===== --- 1.18/drivers/md/raid0.c Tue Oct 15 12:03:07 2002 +++ edited/drivers/md/raid0.c Fri Oct 18 20:28:57 2002 @@ -168,8 +168,7 @@ * @bio: the buffer head that's been built up so far * @biovec: the request that could be merged to it. * - * Return 1 if the merge is not permitted (because the - * result would cross a chunk boundary), 0 otherwise. + * Return amount of bytes we can accept at this offset */ static int raid0_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) { @@ -182,7 +181,7 @@ block = bio->bi_sector >> 1; bio_sz = (bio->bi_size + biovec->bv_len) >> 10; - return chunk_size < ((block & (chunk_size - 1)) + bio_sz); + return chunk_size - ((block & (chunk_size - 1)) + bio_sz); } static int raid0_run (mddev_t *mddev) ===== fs/bio.c 1.30 vs edited ===== --- 1.30/fs/bio.c Sun Oct 13 17:39:40 2002 +++ edited/fs/bio.c Sat Oct 19 00:29:22 2002 @@ -149,12 +149,13 @@ bio_init(bio); if (unlikely(!nr_iovecs)) - goto out; + goto out_vec; bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx); if (bvl) { bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = bvec_array[idx].nr_vecs; +out_vec: bio->bi_io_vec = bvl; bio->bi_destructor = bio_destructor; out: @@ -353,7 +354,7 @@ request_queue_t *q = bdev_get_queue(bdev); int nr_pages; - nr_pages = q->max_sectors >> (PAGE_SHIFT - 9); + nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > q->max_phys_segments) nr_pages = q->max_phys_segments; if (nr_pages > q->max_hw_segments) @@ -384,13 +385,13 @@ * cloned bio must not modify vec list */ if (unlikely(bio_flagged(bio, BIO_CLONED))) - return 1; + return 0; if (bio->bi_vcnt >= bio->bi_max_vecs) - return 1; + return 0; if (((bio->bi_size + len) >> 9) > q->max_sectors) - return 1; + return 0; /* * we might loose a segment or two here, but rather that than @@ -403,7 +404,7 @@ if (fail_segments) { if (retried_segments) - return 1; + return 0; bio->bi_flags &= ~(1 << BIO_SEG_VALID); retried_segments = 1; @@ -424,21 +425,124 @@ * depending on offset), it can specify a merge_bvec_fn in the * queue to get further control */ - if (q->merge_bvec_fn && q->merge_bvec_fn(q, bio, bvec)) { - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - return 1; + if (q->merge_bvec_fn) { + /* + * merge_bvec_fn() returns number of bytes it can accept + * at this offset + */ + if (q->merge_bvec_fn(q, bio, bvec) < len) { + bvec->bv_page = NULL; + bvec->bv_len = 0; + bvec->bv_offset = 0; + return 0; + } } bio->bi_vcnt++; bio->bi_phys_segments++; bio->bi_hw_segments++; bio->bi_size += len; + return len; +} + +static int bio_user_end_io(struct bio *bio, unsigned int bytes_done, int error) +{ + struct bio_vec *bvec; + int i; + + if (bio->bi_size) + return 1; + + bio_for_each_segment(bvec, bio, i) + page_cache_release(bvec->bv_page); + + bio_put(bio); return 0; } /** + * bio_map_user - map user address into bio + * @bdev: destination block device + * @uaddr: start of user address + * @len: length in bytes + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Caller should check the size of the returned bio, we might + * not have mapped the entire range specified. + */ +struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr, + unsigned int len, int write_to_vm) +{ + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + int ret, offset, i, align_mask; + struct page **pages; + struct bio *bio; + + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + align_mask = bdev_hardsect_size(bdev) - 1; + if ((uaddr & align_mask) || (len & align_mask)) + return NULL; + + bio = bio_alloc(GFP_KERNEL, nr_pages); + if (!bio) + return NULL; + + pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, nr_pages, + write_to_vm, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < nr_pages) + goto out; + + bio->bi_bdev = bdev; + + offset = uaddr & ~PAGE_MASK; + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (bio_add_page(bio, pages[i], bytes, offset) < bytes) + break; + + len -= bytes; + offset = 0; + } + + /* + * release the pages we didn't map into the bio, if any + */ + while (i < nr_pages) + page_cache_release(pages[i++]); + + bio->bi_end_io = bio_user_end_io; + kfree(pages); + return bio; +out: + kfree(pages); + bio_put(bio); + return NULL; +} + +/** * bio_endio - end I/O on a bio * @bio: bio * @bytes_done: number of bytes completed @@ -536,7 +640,7 @@ return 0; } -module_init(init_bio); +subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); @@ -549,3 +653,4 @@ EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_get_nr_vecs); +EXPORT_SYMBOL(bio_map_user); ===== fs/direct-io.c 1.14 vs edited ===== --- 1.14/fs/direct-io.c Sun Oct 13 00:45:44 2002 +++ edited/fs/direct-io.c Fri Oct 18 20:28:57 2002 @@ -417,7 +417,7 @@ /* Take a ref against the page each time it is placed into a BIO */ page_cache_get(page); - if (bio_add_page(dio->bio, page, bv_len, bv_offset)) { + if (bio_add_page(dio->bio, page, bv_len, bv_offset) < bv_len) { dio_bio_submit(dio); ret = dio_new_bio(dio, blkno); if (ret == 0) { ===== fs/mpage.c 1.25 vs edited ===== --- 1.25/fs/mpage.c Wed Oct 16 02:30:10 2002 +++ edited/fs/mpage.c Fri Oct 18 20:28:57 2002 @@ -176,6 +176,7 @@ unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; struct buffer_head bh; + int length; if (page_has_buffers(page)) goto confused; @@ -233,7 +234,8 @@ goto confused; } - if (bio_add_page(bio, page, first_hole << blkbits, 0)) { + length = first_hole << blkbits; + if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(READ, bio); goto alloc_new; } @@ -334,6 +336,7 @@ int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; + int length; if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -467,7 +470,8 @@ try_to_free_buffers(page); } - if (bio_add_page(bio, page, first_unmapped << blkbits, 0)) { + length = first_unmapped << blkbits; + if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(WRITE, bio); goto alloc_new; } ===== fs/xfs/pagebuf/page_buf.c 1.15 vs edited ===== --- 1.15/fs/xfs/pagebuf/page_buf.c Mon Oct 14 22:54:12 2002 +++ edited/fs/xfs/pagebuf/page_buf.c Fri Oct 18 20:28:57 2002 @@ -1448,7 +1448,7 @@ if (nbytes > size) nbytes = size; - if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset)) + if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset) < nbytes) break; offset = 0; ===== include/linux/bio.h 1.22 vs edited ===== --- 1.22/include/linux/bio.h Tue Oct 8 13:27:47 2002 +++ edited/include/linux/bio.h Fri Oct 18 20:28:57 2002 @@ -131,6 +131,7 @@ #define bio_page(bio) bio_iovec((bio))->bv_page #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_sectors(bio) ((bio)->bi_size >> 9) +#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) @@ -214,6 +215,8 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern struct bio *bio_map_user(struct block_device *, unsigned long, + unsigned int, int); #ifdef CONFIG_HIGHMEM /* ===== include/linux/blkdev.h 1.76 vs edited ===== --- 1.76/include/linux/blkdev.h Fri Oct 18 19:50:43 2002 +++ edited/include/linux/blkdev.h Fri Oct 18 20:28:57 2002 @@ -219,6 +219,12 @@ wait_queue_head_t queue_wait; struct blk_queue_tag *queue_tags; + + /* + * sg stuff + */ + unsigned int sg_timeout; + unsigned int sg_reserved_size; }; #define RQ_INACTIVE (-1)