2363 lines
66 KiB
Diff
2363 lines
66 KiB
Diff
|
diff -Nurb src/linux/linux.orig/Documentation/netswap.txt src/linux/linux/Documentation/netswap.txt
|
||
|
--- src/linux/linux.orig/Documentation/netswap.txt 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/Documentation/netswap.txt 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,51 @@
|
||
|
+ Swapping over network
|
||
|
+
|
||
|
+Support for this is enabled via the CONFIG_NETSWAP option, which is
|
||
|
+automatically enabled when enabling swap files located on NFS volumes
|
||
|
+(CONFIG_SWAP_VIA_NFS).
|
||
|
+
|
||
|
+When swapping to files located on a network file system like NFS or
|
||
|
+CODA or others or to nbd (network block device, see `nbd.txt')
|
||
|
+partitions there is the problem that this requires additional memory,
|
||
|
+besides the page which is currently swapped in or out, probably at
|
||
|
+least two more pages for each page in question.
|
||
|
+
|
||
|
+This means that not only there needs to be free space left in the swap
|
||
|
+file or the swap partition, but in addition there must be enough free
|
||
|
+memory left in the system to perform the swap out of pages.
|
||
|
+
|
||
|
+This is particularly painful as receiving data over the network itself
|
||
|
+consumes memory, and this memory is allocated from an interrupt
|
||
|
+context (i.e. in the interrupt handler of the network card). That
|
||
|
+means that on a congested network there are chances that the machine
|
||
|
+runs out of memory, simply because the network device's interrupt
|
||
|
+routines allocate memory faster that it is freed by swapping via
|
||
|
+network.
|
||
|
+
|
||
|
+To cope with this problem, there is a new socket option `SO_SWAPPING'
|
||
|
+which has to be set on the `SOL_SOCKET' level with setsockopt() (see
|
||
|
+setsockopt(2)). When this option is set on any network socket, then
|
||
|
+the system will start to drop network packets it receives on any other
|
||
|
+socket when the number of free pages falls below a certain threshold.
|
||
|
+
|
||
|
+This threshold initially is 4 pages less than `freepages.min' (see
|
||
|
+`Documentation/sysctl/vm.txt') but can be tuned using the sysctl
|
||
|
+interface by writing to the file `/proc/sys/net/swapping/threshold'
|
||
|
+
|
||
|
+There are two other files:
|
||
|
+
|
||
|
+`/proc/sys/net/swapping/dropped':
|
||
|
+ how many network packets have been dropped so far. This file is
|
||
|
+ writable, writing to it simply sets the counter to the given value
|
||
|
+ (useful for resetting the counter).
|
||
|
+
|
||
|
+`/proc/sys/net/swapping/sock_count':
|
||
|
+ How many network sockets have the `SO_SWAPPING' option set (read
|
||
|
+ only, of course).
|
||
|
+
|
||
|
+When using swap-files on NFS volumes, then the `SO_SWAPPING' option is
|
||
|
+set or cleared by swapon/swapoff system calls, so the user need not
|
||
|
+care about it.
|
||
|
+
|
||
|
+Swapping over the network is insecure unless the data would be
|
||
|
+encrypted, which is not the case with NFS. It is also very slow.
|
||
|
diff -Nurb src/linux/linux.orig/Documentation/nfsswap.txt src/linux/linux/Documentation/nfsswap.txt
|
||
|
--- src/linux/linux.orig/Documentation/nfsswap.txt 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/Documentation/nfsswap.txt 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,41 @@
|
||
|
+ Swapping to files on NFS volumes
|
||
|
+
|
||
|
+To do this you have to say `Y' or `M' to the CONFIG_SWAP_VIA_NFS
|
||
|
+configuration option. When compling support for this as a module you
|
||
|
+should read `Documentation/modules.txt'. For auto-loading of the
|
||
|
+module during the `swapon' system call you have to place a line like
|
||
|
+
|
||
|
+alias swapfile-mod nfsswap
|
||
|
+
|
||
|
+in `/etc/modules.conf' (or `/etc/conf.modules', depending on your
|
||
|
+setup). NFS volumes holding swapfile should be mounted with `rsize'
|
||
|
+and `wsize' set to something less than the size of a page, otherwise
|
||
|
+deadlocks caused by memory fragmentation can happen, i.e. mount the
|
||
|
+volume which is to hold the swapfiles with
|
||
|
+
|
||
|
+mount -t nfs -o rsize=2048,wsize=2048 NFS_SERVER_IP:/server_volume /mount_point
|
||
|
+
|
||
|
+or set the option in `/etc/fstab'. Read `Documentation/nfsroot.txt' to
|
||
|
+learn how to set mount options for the root file system, if your swap
|
||
|
+files are to be located on the root file system.
|
||
|
+
|
||
|
+Setting the `rsize' and `wsize' to anything less than PAGE_SIZE is a
|
||
|
+performance hit, so you probably want to have at least two volumes
|
||
|
+mounted, one for the swapfiles, one for the rest.
|
||
|
+
|
||
|
+You may want to read `Documentation/netswap.txt' as well.
|
||
|
+
|
||
|
+Swapfiles on NFS volumes can be treated like any other swapfile,
|
||
|
+i.e.
|
||
|
+
|
||
|
+dd if=/dev/zero of=/swapfiles/SWAPFILE bs=1k count=20480
|
||
|
+mkswap /swapfiles/SWAPFILE
|
||
|
+swapon /swapfiles/SWAPFILE
|
||
|
+
|
||
|
+will create a 20M swapfile and tell the system to use it. Actually,
|
||
|
+one could use lseek(2) to create an empty swapfile. This is different
|
||
|
+from swapfiles located on local harddisk.
|
||
|
+
|
||
|
+Swapping over the network is insecure unless the data would be
|
||
|
+encrypted, which is not the case with NFS. It is also very slow.
|
||
|
+
|
||
|
diff -Nurb src/linux/linux.orig/drivers/block/blkpg.c src/linux/linux/drivers/block/blkpg.c
|
||
|
--- src/linux/linux.orig/drivers/block/blkpg.c 2003-07-04 04:11:31.000000000 -0400
|
||
|
+++ src/linux/linux/drivers/block/blkpg.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -34,7 +34,7 @@
|
||
|
#include <linux/blk.h> /* for set_device_ro() */
|
||
|
#include <linux/blkpg.h>
|
||
|
#include <linux/genhd.h>
|
||
|
-#include <linux/swap.h> /* for is_swap_partition() */
|
||
|
+#include <linux/swap.h> /* for swap_run_test() */
|
||
|
#include <linux/module.h> /* for EXPORT_SYMBOL */
|
||
|
|
||
|
#include <asm/uaccess.h>
|
||
|
@@ -114,6 +114,29 @@
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/* swap_run_test() applies this hook to all swapfiles until it returns
|
||
|
+ * "1". If it never returns "1", the result of swap_run_test() is "0",
|
||
|
+ * otherwise "1".
|
||
|
+ */
|
||
|
+static int is_swap_partition_hook(unsigned int flags, struct file *swap_file,
|
||
|
+ void *testdata)
|
||
|
+{
|
||
|
+ kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode)
|
||
|
+ ? swap_file->f_dentry->d_inode->i_rdev : 0;
|
||
|
+ kdev_t dev = *((kdev_t *)testdata);
|
||
|
+
|
||
|
+ if (flags & SWP_USED && dev == swap_dev) {
|
||
|
+ return 1;
|
||
|
+ } else {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+static inline int is_swap_partition(kdev_t dev)
|
||
|
+{
|
||
|
+ return swap_run_test(is_swap_partition_hook, &dev);
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* Delete a partition given by partition number
|
||
|
*
|
||
|
diff -Nurb src/linux/linux.orig/fs/Config.in src/linux/linux/fs/Config.in
|
||
|
--- src/linux/linux.orig/fs/Config.in 2004-05-31 02:02:43.000000000 -0400
|
||
|
+++ src/linux/linux/fs/Config.in 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -4,6 +4,12 @@
|
||
|
mainmenu_option next_comment
|
||
|
comment 'File systems'
|
||
|
|
||
|
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
|
||
|
+ tristate 'Swapping to block devices' CONFIG_BLKDEV_SWAP
|
||
|
+else
|
||
|
+ define_bool CONFIG_BLKDEV_SWAP y
|
||
|
+fi
|
||
|
+
|
||
|
bool 'Quota support' CONFIG_QUOTA
|
||
|
tristate 'Kernel automounter support' CONFIG_AUTOFS_FS
|
||
|
tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS
|
||
|
@@ -110,6 +116,12 @@
|
||
|
dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
|
||
|
dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
|
||
|
dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
|
||
|
+ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
|
||
|
+ dep_tristate ' Swapping via NFS (EXPERIMENTAL)' CONFIG_SWAP_VIA_NFS $CONFIG_NFS_FS
|
||
|
+ if [ "$CONFIG_SWAP_VIA_NFS" = "y" -o "$CONFIG_SWAP_VIA_NFS" = "m" ]; then
|
||
|
+ define_bool CONFIG_NETSWAP y
|
||
|
+ fi
|
||
|
+ fi
|
||
|
|
||
|
dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
|
||
|
dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD
|
||
|
diff -Nurb src/linux/linux.orig/fs/Makefile src/linux/linux/fs/Makefile
|
||
|
--- src/linux/linux.orig/fs/Makefile 2004-05-31 02:02:42.000000000 -0400
|
||
|
+++ src/linux/linux/fs/Makefile 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -8,7 +8,7 @@
|
||
|
O_TARGET := fs.o
|
||
|
|
||
|
export-objs := filesystems.o open.o dcache.o buffer.o
|
||
|
-mod-subdirs := nls
|
||
|
+mod-subdirs := nls nfs
|
||
|
|
||
|
obj-y := open.o read_write.o devices.o file_table.o buffer.o \
|
||
|
super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
|
||
|
@@ -70,6 +70,7 @@
|
||
|
subdir-$(CONFIG_JFS_FS) += jfs
|
||
|
subdir-$(CONFIG_SQUASHFS) += squashfs
|
||
|
|
||
|
+obj-$(CONFIG_BLKDEV_SWAP) += blkdev_swap.o
|
||
|
|
||
|
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
|
||
|
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
|
||
|
diff -Nurb src/linux/linux.orig/fs/blkdev_swap.c src/linux/linux/fs/blkdev_swap.c
|
||
|
--- src/linux/linux.orig/fs/blkdev_swap.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/fs/blkdev_swap.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,309 @@
|
||
|
+/*
|
||
|
+ * Swapping to partitions or files located on partitions.
|
||
|
+ */
|
||
|
+
|
||
|
+#include <linux/config.h>
|
||
|
+#include <linux/module.h>
|
||
|
+#include <linux/init.h>
|
||
|
+#include <linux/slab.h>
|
||
|
+#include <linux/locks.h>
|
||
|
+#include <linux/blkdev.h>
|
||
|
+#include <linux/pagemap.h>
|
||
|
+#include <linux/swap.h>
|
||
|
+#include <linux/fs.h>
|
||
|
+
|
||
|
+#ifdef DEBUG_BLKDEV_SWAP
|
||
|
+# define dprintk(fmt...) printk(##fmt)
|
||
|
+#else
|
||
|
+# define dprintk(fmt...) do { /* */ } while (0)
|
||
|
+#endif
|
||
|
+
|
||
|
+#define BLKDEV_SWAP_ID "blkdev"
|
||
|
+#define BLKDEV_FILE_SWAP_ID "blkdev file"
|
||
|
+
|
||
|
+/*
|
||
|
+ * Helper function, copied here from buffer.c
|
||
|
+ */
|
||
|
+
|
||
|
+/*
|
||
|
+ * Start I/O on a page.
|
||
|
+ * This function expects the page to be locked and may return
|
||
|
+ * before I/O is complete. You then have to check page->locked
|
||
|
+ * and page->uptodate.
|
||
|
+ *
|
||
|
+ * brw_page() is SMP-safe, although it's being called with the
|
||
|
+ * kernel lock held - but the code is ready.
|
||
|
+ *
|
||
|
+ * FIXME: we need a swapper_inode->get_block function to remove
|
||
|
+ * some of the bmap kludges and interface ugliness here.
|
||
|
+ */
|
||
|
+int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
|
||
|
+{
|
||
|
+ struct buffer_head *head, *bh;
|
||
|
+
|
||
|
+ if (!PageLocked(page))
|
||
|
+ panic("brw_page: page not locked for I/O");
|
||
|
+
|
||
|
+ if (!page->buffers)
|
||
|
+ create_empty_buffers(page, dev, size);
|
||
|
+ head = bh = page->buffers;
|
||
|
+
|
||
|
+ /* Stage 1: lock all the buffers */
|
||
|
+ do {
|
||
|
+ lock_buffer(bh);
|
||
|
+ bh->b_blocknr = *(b++);
|
||
|
+ set_bit(BH_Mapped, &bh->b_state);
|
||
|
+ set_buffer_async_io(bh);
|
||
|
+ bh = bh->b_this_page;
|
||
|
+ } while (bh != head);
|
||
|
+
|
||
|
+ /* Stage 2: start the IO */
|
||
|
+ do {
|
||
|
+ struct buffer_head *next = bh->b_this_page;
|
||
|
+ submit_bh(rw, bh);
|
||
|
+ bh = next;
|
||
|
+ } while (bh != head);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * We implement to methods: swapping to partitions, and swapping to files
|
||
|
+ * located on partitions.
|
||
|
+ */
|
||
|
+
|
||
|
+struct blkdev_swap_data {
|
||
|
+ kdev_t dev;
|
||
|
+};
|
||
|
+
|
||
|
+struct test_data {
|
||
|
+ struct file * filp;
|
||
|
+ kdev_t dev;
|
||
|
+};
|
||
|
+
|
||
|
+static int is_blkdev_swapping(unsigned int flags,
|
||
|
+ struct file * swapf,
|
||
|
+ void *data)
|
||
|
+{
|
||
|
+ struct test_data *testdata = (struct test_data *) data;
|
||
|
+ struct file * filp = testdata->filp;
|
||
|
+ kdev_t dev = testdata->dev;
|
||
|
+
|
||
|
+ /* Only check filp's that don't match the one already opened
|
||
|
+ * for us by sys_swapon(). Otherwise, we will always flag a
|
||
|
+ * busy swap file.
|
||
|
+ */
|
||
|
+
|
||
|
+ if (swapf != filp) {
|
||
|
+ if (dev == swapf->f_dentry->d_inode->i_rdev)
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdev_swap_open(struct file * filp, void **dptr)
|
||
|
+{
|
||
|
+ int swapfilesize;
|
||
|
+ kdev_t dev;
|
||
|
+ struct blkdev_swap_data *data;
|
||
|
+ int error;
|
||
|
+ struct test_data testdata;
|
||
|
+
|
||
|
+ MOD_INC_USE_COUNT;
|
||
|
+
|
||
|
+ if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) {
|
||
|
+ dprintk(__FUNCTION__": can't handle this swap file: %s\n",
|
||
|
+ swapf->d_name.name);
|
||
|
+ error = 0; /* not for us */
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+
|
||
|
+ dev = filp->f_dentry->d_inode->i_rdev;
|
||
|
+ set_blocksize(dev, PAGE_SIZE);
|
||
|
+ error = -ENODEV;
|
||
|
+ if (!dev ||
|
||
|
+ (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) {
|
||
|
+ printk("blkdev_swap_open: blkdev weirdness for %s\n",
|
||
|
+ filp->f_dentry->d_name.name);
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Check to make sure that we aren't already swapping. */
|
||
|
+ error = -EBUSY;
|
||
|
+ testdata.filp = filp;
|
||
|
+ testdata.dev = dev;
|
||
|
+ if (swap_run_test(is_blkdev_swapping, &testdata)) {
|
||
|
+ printk("blkdev_swap_open: already swapping to %s\n",
|
||
|
+ filp->f_dentry->d_name.name);
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+
|
||
|
+ swapfilesize = 0;
|
||
|
+ if (blk_size[MAJOR(dev)])
|
||
|
+ swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
|
||
|
+ >> (PAGE_SHIFT - 10);
|
||
|
+
|
||
|
+ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
|
||
|
+ printk("blkdev_swap_open: can't allocate data for %s\n",
|
||
|
+ filp->f_dentry->d_name.name);
|
||
|
+ error = -ENOMEM;
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ data->dev = dev;
|
||
|
+ *dptr = data;
|
||
|
+
|
||
|
+ dprintk("blkdev_swap_open: returning %d\n", swapfilesize);
|
||
|
+ return swapfilesize;
|
||
|
+
|
||
|
+ bad_swap:
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return error; /* this swap thing is not for us */
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdev_swap_release(struct file * filp, void *data)
|
||
|
+{
|
||
|
+ dprintk("blkdev_swap_release: releasing swap device %s\n",
|
||
|
+ filp->f_dentry->d_name.name);
|
||
|
+ kfree(data);
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdev_rw_page(int rw, struct page *page, unsigned long offset,
|
||
|
+ void *ptr)
|
||
|
+{
|
||
|
+ struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr;
|
||
|
+ brw_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE);
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
+static struct swap_ops blkdev_swap_ops = {
|
||
|
+ blkdev_swap_open,
|
||
|
+ blkdev_swap_release,
|
||
|
+ blkdev_rw_page
|
||
|
+};
|
||
|
+
|
||
|
+struct blkdevfile_swap_data {
|
||
|
+ struct inode *swapf;
|
||
|
+};
|
||
|
+
|
||
|
+static int is_blkdevfile_swapping(unsigned int flags,
|
||
|
+ struct file * swapf,
|
||
|
+ void * data)
|
||
|
+{
|
||
|
+ struct file * filp = (struct file *) data;
|
||
|
+
|
||
|
+ /* Only check filp's that don't match the one already opened
|
||
|
+ * for us by sys_swapon(). Otherwise, we will always flag a
|
||
|
+ * busy swap file.
|
||
|
+ */
|
||
|
+
|
||
|
+ if (swapf != filp) {
|
||
|
+ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdevfile_swap_open(struct file *swapf, void **dptr)
|
||
|
+{
|
||
|
+ int error = 0;
|
||
|
+ int swapfilesize;
|
||
|
+ struct blkdevfile_swap_data *data;
|
||
|
+
|
||
|
+ MOD_INC_USE_COUNT;
|
||
|
+
|
||
|
+ /* first check whether this is a regular file located on a local
|
||
|
+ * hard disk
|
||
|
+ */
|
||
|
+ if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) {
|
||
|
+ dprintk("blkdevfile_swap_open: "
|
||
|
+ "can't handle this swap file: %s\n",
|
||
|
+ swapf->d_name.name);
|
||
|
+ error = 0; /* not for us */
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) {
|
||
|
+ dprintk("blkdevfile_swap_open: no bmap for file: %s\n",
|
||
|
+ swapf->d_name.name);
|
||
|
+ error = 0; /* not for us */
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (swap_run_test(is_blkdevfile_swapping, swapf)) {
|
||
|
+ dprintk("blkdevfile_swap_open: already swapping to %s\n",
|
||
|
+ swapf->d_name.name);
|
||
|
+ error = -EBUSY;
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT;
|
||
|
+ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
|
||
|
+ error = -ENOMEM;
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ data->swapf = swapf->f_dentry->d_inode;
|
||
|
+ *dptr = data;
|
||
|
+ return swapfilesize;
|
||
|
+
|
||
|
+ bad_swap:
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return error;
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdevfile_swap_release(struct file *swapf, void *data)
|
||
|
+{
|
||
|
+ kfree(data);
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset,
|
||
|
+ void *ptr)
|
||
|
+{
|
||
|
+ struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr;
|
||
|
+ struct inode * swapf = data->swapf;
|
||
|
+ int i, j;
|
||
|
+ unsigned int block = offset
|
||
|
+ << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
|
||
|
+ kdev_t dev = swapf->i_dev;
|
||
|
+ int block_size;
|
||
|
+ int zones[PAGE_SIZE/512];
|
||
|
+ int zones_used;
|
||
|
+
|
||
|
+ block_size = swapf->i_sb->s_blocksize;
|
||
|
+ for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
|
||
|
+ if (!(zones[i] = bmap(swapf,block++))) {
|
||
|
+ printk("blkdevfile_rw_page: bad swap file\n");
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ zones_used = i;
|
||
|
+
|
||
|
+ /* block_size == PAGE_SIZE/zones_used */
|
||
|
+ brw_page(rw, page, dev, zones, block_size);
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
+static struct swap_ops blkdevfile_swap_ops = {
|
||
|
+ blkdevfile_swap_open,
|
||
|
+ blkdevfile_swap_release,
|
||
|
+ blkdevfile_rw_page
|
||
|
+ };
|
||
|
+
|
||
|
+int __init blkdev_swap_init(void)
|
||
|
+{
|
||
|
+ (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops);
|
||
|
+ (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+void __exit blkdev_swap_exit(void)
|
||
|
+{
|
||
|
+ unregister_swap_method(BLKDEV_SWAP_ID);
|
||
|
+ unregister_swap_method(BLKDEV_FILE_SWAP_ID);
|
||
|
+}
|
||
|
+
|
||
|
+module_init(blkdev_swap_init)
|
||
|
+module_exit(blkdev_swap_exit)
|
||
|
+
|
||
|
+MODULE_LICENSE("GPL");
|
||
|
+MODULE_AUTHOR("Many. Stuffed into a module by cH (Claus-Justus Heine)");
|
||
|
+MODULE_DESCRIPTION("Swapping to partitions and files on local hard-disks");
|
||
|
diff -Nurb src/linux/linux.orig/fs/buffer.c src/linux/linux/fs/buffer.c
|
||
|
--- src/linux/linux.orig/fs/buffer.c 2003-07-04 04:12:05.000000000 -0400
|
||
|
+++ src/linux/linux/fs/buffer.c 2004-05-31 02:21:05.000000000 -0400
|
||
|
@@ -743,7 +743,7 @@
|
||
|
bh->b_private = private;
|
||
|
}
|
||
|
|
||
|
-static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
|
||
|
+void end_buffer_io_async(struct buffer_head * bh, int uptodate)
|
||
|
{
|
||
|
static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
|
||
|
unsigned long flags;
|
||
|
@@ -2344,35 +2344,6 @@
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
|
||
|
-{
|
||
|
- struct buffer_head *head, *bh;
|
||
|
-
|
||
|
- if (!PageLocked(page))
|
||
|
- panic("brw_page: page not locked for I/O");
|
||
|
-
|
||
|
- if (!page->buffers)
|
||
|
- create_empty_buffers(page, dev, size);
|
||
|
- head = bh = page->buffers;
|
||
|
-
|
||
|
- /* Stage 1: lock all the buffers */
|
||
|
- do {
|
||
|
- lock_buffer(bh);
|
||
|
- bh->b_blocknr = *(b++);
|
||
|
- set_bit(BH_Mapped, &bh->b_state);
|
||
|
- set_buffer_async_io(bh);
|
||
|
- bh = bh->b_this_page;
|
||
|
- } while (bh != head);
|
||
|
-
|
||
|
- /* Stage 2: start the IO */
|
||
|
- do {
|
||
|
- struct buffer_head *next = bh->b_this_page;
|
||
|
- submit_bh(rw, bh);
|
||
|
- bh = next;
|
||
|
- } while (bh != head);
|
||
|
- return 0;
|
||
|
-}
|
||
|
-
|
||
|
int block_symlink(struct inode *inode, const char *symname, int len)
|
||
|
{
|
||
|
struct address_space *mapping = inode->i_mapping;
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/Makefile src/linux/linux/fs/nfs/Makefile
|
||
|
--- src/linux/linux.orig/fs/nfs/Makefile 2003-07-04 04:12:07.000000000 -0400
|
||
|
+++ src/linux/linux/fs/nfs/Makefile 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -15,6 +15,14 @@
|
||
|
obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
|
||
|
obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
|
||
|
|
||
|
-obj-m := $(O_TARGET)
|
||
|
+obj-$(CONFIG_SWAP_VIA_NFS) += nfsswap.o
|
||
|
+ifeq ($(CONFIG_SWAP_VIA_NFS),m)
|
||
|
+export-objs := nfs_syms.o
|
||
|
+obj-y += nfs_syms.o
|
||
|
+endif
|
||
|
+
|
||
|
+ifeq ($(CONFIG_NFS_FS),m)
|
||
|
+obj-m += $(O_TARGET)
|
||
|
+endif
|
||
|
|
||
|
include $(TOPDIR)/Rules.make
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/file.c src/linux/linux/fs/nfs/file.c
|
||
|
--- src/linux/linux.orig/fs/nfs/file.c 2003-07-04 04:12:07.000000000 -0400
|
||
|
+++ src/linux/linux/fs/nfs/file.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -58,11 +58,6 @@
|
||
|
setattr: nfs_notify_change,
|
||
|
};
|
||
|
|
||
|
-/* Hack for future NFS swap support */
|
||
|
-#ifndef IS_SWAPFILE
|
||
|
-# define IS_SWAPFILE(inode) (0)
|
||
|
-#endif
|
||
|
-
|
||
|
/*
|
||
|
* Flush all dirty pages, and check for write errors.
|
||
|
*
|
||
|
@@ -217,8 +212,6 @@
|
||
|
inode->i_ino, (unsigned long) count, (unsigned long) *ppos);
|
||
|
|
||
|
result = -EBUSY;
|
||
|
- if (IS_SWAPFILE(inode))
|
||
|
- goto out_swapfile;
|
||
|
result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
||
|
if (result)
|
||
|
goto out;
|
||
|
@@ -230,10 +223,6 @@
|
||
|
result = generic_file_write(file, buf, count, ppos);
|
||
|
out:
|
||
|
return result;
|
||
|
-
|
||
|
-out_swapfile:
|
||
|
- printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
|
||
|
- goto out;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/nfs_syms.c src/linux/linux/fs/nfs/nfs_syms.c
|
||
|
--- src/linux/linux.orig/fs/nfs/nfs_syms.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/fs/nfs/nfs_syms.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,10 @@
|
||
|
+#include <linux/config.h>
|
||
|
+#define __NO_VERSION__
|
||
|
+#include <linux/module.h>
|
||
|
+#include <linux/types.h>
|
||
|
+#include <linux/sunrpc/clnt.h>
|
||
|
+#include <linux/nfs_fs.h>
|
||
|
+
|
||
|
+EXPORT_SYMBOL(__nfs_refresh_inode);
|
||
|
+EXPORT_SYMBOL(nfs_write_attributes);
|
||
|
+
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/nfsswap.c src/linux/linux/fs/nfs/nfsswap.c
|
||
|
--- src/linux/linux.orig/fs/nfs/nfsswap.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/fs/nfs/nfsswap.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,350 @@
|
||
|
+/*
|
||
|
+ * Swapping to files located on NFS mounted volumes
|
||
|
+ * Copyright (c) 2000 Claus-Justus Heine
|
||
|
+ *
|
||
|
+ */
|
||
|
+
|
||
|
+#include <linux/config.h>
|
||
|
+#include <linux/module.h>
|
||
|
+#include <linux/init.h>
|
||
|
+#include <linux/types.h>
|
||
|
+#include <linux/slab.h>
|
||
|
+#include <linux/swap.h>
|
||
|
+#include <linux/pagemap.h>
|
||
|
+#include <linux/file.h>
|
||
|
+#include <linux/fs.h>
|
||
|
+#include <linux/socket.h>
|
||
|
+#include <linux/smp_lock.h>
|
||
|
+#include <net/netswapping.h>
|
||
|
+#include <net/sock.h>
|
||
|
+
|
||
|
+#include <linux/sunrpc/clnt.h>
|
||
|
+#include <linux/nfs_fs.h>
|
||
|
+#include <linux/nfs_fs_sb.h>
|
||
|
+#include <asm/uaccess.h>
|
||
|
+
|
||
|
+#define NFSDBG_FACILITY NFSDBG_SWAP
|
||
|
+
|
||
|
+#define NFS_SWAP_ID "nfs file"
|
||
|
+
|
||
|
+/* we cache some values here. In principle, we only need the file.
|
||
|
+ */
|
||
|
+struct nfs_swap_data {
|
||
|
+ struct file *file;
|
||
|
+ struct inode *inode;
|
||
|
+ struct nfs_server *server;
|
||
|
+ struct socket *socket;
|
||
|
+};
|
||
|
+
|
||
|
+/* Nearly a clone of nfs_readpage_sync() in read.c, but "struct page" does not
|
||
|
+ * contain information about the file offset when swapping. So.
|
||
|
+ */
|
||
|
+static int nfs_read_swap_page(struct page *page,
|
||
|
+ struct nfs_server *server,
|
||
|
+ struct inode *inode,
|
||
|
+ struct file *file)
|
||
|
+{
|
||
|
+ unsigned int rsize = server->rsize;
|
||
|
+ unsigned int count = PAGE_SIZE;
|
||
|
+ unsigned int offset = 0; /* always at start of page */
|
||
|
+ int result, eof;
|
||
|
+ struct rpc_cred *cred;
|
||
|
+ struct nfs_fattr fattr;
|
||
|
+
|
||
|
+ cred = nfs_file_cred(file);
|
||
|
+
|
||
|
+ do {
|
||
|
+ if (count < rsize)
|
||
|
+ rsize = count;
|
||
|
+
|
||
|
+ lock_kernel();
|
||
|
+ result = NFS_PROTO(inode)->read(inode, cred,
|
||
|
+ &fattr,
|
||
|
+ NFS_RPC_SWAPFLAGS,
|
||
|
+ offset, rsize, page, &eof);
|
||
|
+ nfs_refresh_inode(inode, &fattr);
|
||
|
+ unlock_kernel();
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Even if we had a partial success we can't mark the page
|
||
|
+ * cache valid.
|
||
|
+ */
|
||
|
+ if (result < 0) {
|
||
|
+ if (result == -EISDIR)
|
||
|
+ result = -EINVAL;
|
||
|
+ goto io_error;
|
||
|
+ }
|
||
|
+ count -= result;
|
||
|
+ offset += result;
|
||
|
+ if (result < rsize) /* NFSv2ism */
|
||
|
+ break;
|
||
|
+ } while (count);
|
||
|
+
|
||
|
+ if (count) {
|
||
|
+ char *kaddr = kmap(page);
|
||
|
+ memset(kaddr + offset, 0, count);
|
||
|
+ kunmap(page);
|
||
|
+ }
|
||
|
+ flush_dcache_page(page);
|
||
|
+ result = 0;
|
||
|
+
|
||
|
+io_error:
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
+/* Like nfs_writepage_sync(), but when swapping page->index does not encode
|
||
|
+ * the offset in the swap file alone.
|
||
|
+ *
|
||
|
+ */
|
||
|
+static int nfs_write_swap_page(struct page *page,
|
||
|
+ struct nfs_server *server,
|
||
|
+ struct inode *inode,
|
||
|
+ struct file *file)
|
||
|
+{
|
||
|
+ struct rpc_cred *cred;
|
||
|
+ unsigned int wsize = server->wsize;
|
||
|
+ unsigned int count = PAGE_SIZE;
|
||
|
+ unsigned int offset = 0;
|
||
|
+ int result;
|
||
|
+ struct nfs_writeverf verf;
|
||
|
+ struct nfs_fattr fattr;
|
||
|
+
|
||
|
+ cred = nfs_file_cred(file);
|
||
|
+
|
||
|
+ do {
|
||
|
+ if (count < wsize)
|
||
|
+ wsize = count;
|
||
|
+
|
||
|
+ lock_kernel();
|
||
|
+ result = NFS_PROTO(inode)->write(inode, cred, &fattr,
|
||
|
+ NFS_RW_SWAP|NFS_RW_SYNC,
|
||
|
+ offset, wsize, page, &verf);
|
||
|
+ nfs_write_attributes(inode, &fattr);
|
||
|
+ unlock_kernel();
|
||
|
+
|
||
|
+ if (result < 0) {
|
||
|
+ goto io_error;
|
||
|
+ }
|
||
|
+ if (result != wsize)
|
||
|
+ printk("NFS: short write, wsize=%u, result=%d\n",
|
||
|
+ wsize, result);
|
||
|
+ offset += wsize;
|
||
|
+ count -= wsize;
|
||
|
+ /*
|
||
|
+ * If we've extended the file, update the inode
|
||
|
+ * now so we don't invalidate the cache.
|
||
|
+ */
|
||
|
+ if (offset > inode->i_size)
|
||
|
+ inode->i_size = offset;
|
||
|
+ } while (count);
|
||
|
+
|
||
|
+ result = 0;
|
||
|
+
|
||
|
+io_error:
|
||
|
+
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
+/* Unluckily (for us) form 2.4.19 -> 2.4.20 the nfs-proc's where
|
||
|
+ * changed and expect now a proper file-mapping page, where index
|
||
|
+ * encodes the offset alone.
|
||
|
+ *
|
||
|
+ * What we do: we save the original value of page->index, initialize
|
||
|
+ * page->index to what the NFS/sun-rpc subsystem expects and restore
|
||
|
+ * the index later.
|
||
|
+ */
|
||
|
+static int nfs_rw_swap_page(int rw, struct page *page,
|
||
|
+ unsigned long offset, void *dptr)
|
||
|
+{
|
||
|
+ int error;
|
||
|
+ struct nfs_swap_data *data = dptr;
|
||
|
+ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
|
||
|
+ unsigned long page_index;
|
||
|
+
|
||
|
+ if (!PageLocked(page))
|
||
|
+ panic("nfs_rw_swap_page: page not locked for I/O");
|
||
|
+
|
||
|
+ /* prevent memory deadlocks */
|
||
|
+ if (!(current->flags & PF_MEMALLOC)) {
|
||
|
+ dprintk("nfs_rw_swap_page: Setting PF_MEMALLOC\n");
|
||
|
+ }
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+
|
||
|
+ /* now tweak the page->index field ... */
|
||
|
+ page_index = page->index;
|
||
|
+ page->index = ((loff_t)offset*(loff_t)PAGE_SIZE) >> PAGE_CACHE_SHIFT;
|
||
|
+
|
||
|
+ if (rw == WRITE) {
|
||
|
+ error = nfs_write_swap_page(page,
|
||
|
+ data->server,
|
||
|
+ data->inode,
|
||
|
+ data->file);
|
||
|
+ } else {
|
||
|
+ error = nfs_read_swap_page(page,
|
||
|
+ data->server,
|
||
|
+ data->inode,
|
||
|
+ data->file);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!alloc_flag) {
|
||
|
+ current->flags &= ~PF_MEMALLOC;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* now restore the page->index field ... */
|
||
|
+ page->index = page_index;
|
||
|
+
|
||
|
+ if (error) {
|
||
|
+ /* Must mark the page invalid after I/O error */
|
||
|
+ SetPageError(page);
|
||
|
+ ClearPageUptodate(page);
|
||
|
+ } else {
|
||
|
+ ClearPageError(page);
|
||
|
+ SetPageUptodate(page);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!error) { /* in case of an error rw_swap_page() likes to unlock
|
||
|
+ * itself.
|
||
|
+ */
|
||
|
+ UnlockPage(page);
|
||
|
+ }
|
||
|
+
|
||
|
+ return error < 0 ? 0 : 1;
|
||
|
+}
|
||
|
+
|
||
|
+static int is_nfsfile_swapping(unsigned int flags,
|
||
|
+ struct file * swapf,
|
||
|
+ void * data)
|
||
|
+{
|
||
|
+ struct file * filp = (struct file *) data;
|
||
|
+
|
||
|
+ /* Only check filp's that don't match the one already opened
|
||
|
+ * for us by sys_swapon(). Otherwise, we will always flag a
|
||
|
+ * busy swap file.
|
||
|
+ */
|
||
|
+
|
||
|
+ if (swapf != filp) {
|
||
|
+ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int nfs_swap_open(struct file *swapf, void **dptr)
|
||
|
+{
|
||
|
+ int error = 0;
|
||
|
+ int swapfilesize;
|
||
|
+ struct nfs_swap_data *data;
|
||
|
+ int on = 1;
|
||
|
+ mm_segment_t fs;
|
||
|
+ struct inode *inode = swapf->f_dentry->d_inode;
|
||
|
+
|
||
|
+ MOD_INC_USE_COUNT;
|
||
|
+
|
||
|
+ if (!S_ISREG(inode->i_mode)) {
|
||
|
+ dprintk("nfs_swap_open: can't handle this swap file: %s\n",
|
||
|
+ swapf->f_dentry->d_name.name);
|
||
|
+ error = 0; /* not for us */
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ /* determine whether this file really is located on an NFS mounted
|
||
|
+ * volume
|
||
|
+ */
|
||
|
+ if (!inode->i_sb || inode->i_sb->s_magic != NFS_SUPER_MAGIC) {
|
||
|
+ dprintk("nfs_swap_open: %s is not an NFS file.\n",
|
||
|
+ swapf->f_dentry->d_name.name);
|
||
|
+ error = 0; /* not for us */
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (swap_run_test(is_nfsfile_swapping, swapf)) {
|
||
|
+ dprintk("nfs_swap_open: already swapping to %s\n",
|
||
|
+ swapf->f_dentry->d_name.name);
|
||
|
+ error = -EBUSY;
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ swapfilesize = inode->i_size >> PAGE_SHIFT;
|
||
|
+ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
|
||
|
+ error = -ENOMEM;
|
||
|
+ goto bad_swap;
|
||
|
+ }
|
||
|
+ data->file = swapf;
|
||
|
+ data->inode = inode;
|
||
|
+ data->server = NFS_SERVER(inode);
|
||
|
+ data->socket = data->server->client->cl_xprt->sock;
|
||
|
+
|
||
|
+ /* set socket option SO_SWAPPING */
|
||
|
+ fs = get_fs();
|
||
|
+ set_fs(KERNEL_DS);
|
||
|
+ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
|
||
|
+ (char *)&on, sizeof(on));
|
||
|
+ set_fs(fs);
|
||
|
+ if (error) {
|
||
|
+ dprintk("nfs_swap_open: error setting SO_SWAPPING\n");
|
||
|
+ goto bad_swap_2;
|
||
|
+ }
|
||
|
+
|
||
|
+ *dptr = data;
|
||
|
+ return swapfilesize;
|
||
|
+
|
||
|
+ bad_swap_2:
|
||
|
+ kfree(data);
|
||
|
+ bad_swap:
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return error;
|
||
|
+}
|
||
|
+
|
||
|
+static int nfs_swap_release(struct file *swapf, void *dptr)
|
||
|
+{
|
||
|
+ struct nfs_swap_data *data = (struct nfs_swap_data *)dptr;
|
||
|
+ int off = 0;
|
||
|
+ mm_segment_t fs;
|
||
|
+ int error;
|
||
|
+
|
||
|
+#if 1
|
||
|
+ if (swapf != data->file ||
|
||
|
+ swapf->f_dentry->d_inode != data->inode ||
|
||
|
+ !swapf->f_dentry->d_inode->i_sb ||
|
||
|
+ swapf->f_dentry->d_inode->i_sb->s_magic != NFS_SUPER_MAGIC ||
|
||
|
+ NFS_SERVER(swapf->f_dentry->d_inode) != data->server ||
|
||
|
+ data->socket != data->server->client->cl_xprt->sock) {
|
||
|
+ panic("nfs_swap_release: nfs swap data messed up");
|
||
|
+ }
|
||
|
+#endif
|
||
|
+
|
||
|
+ /* remove socket option SO_SWAPPING */
|
||
|
+ fs = get_fs();
|
||
|
+ set_fs(KERNEL_DS);
|
||
|
+ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
|
||
|
+ (char *)&off, sizeof(off));
|
||
|
+ set_fs(fs);
|
||
|
+ if (error) {
|
||
|
+ dprintk("nfs_swap_open: error clearing SO_SWAPPING\n");
|
||
|
+ }
|
||
|
+ kfree(data);
|
||
|
+ MOD_DEC_USE_COUNT;
|
||
|
+ return error;
|
||
|
+}
|
||
|
+
|
||
|
+static struct swap_ops nfs_swap_ops = {
|
||
|
+ open: nfs_swap_open,
|
||
|
+ release: nfs_swap_release,
|
||
|
+ rw_page: nfs_rw_swap_page
|
||
|
+};
|
||
|
+
|
||
|
+int __init nfs_swap_init(void)
|
||
|
+{
|
||
|
+ (void)register_swap_method(NFS_SWAP_ID, &nfs_swap_ops);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+void __exit nfs_swap_exit(void)
|
||
|
+{
|
||
|
+ unregister_swap_method(NFS_SWAP_ID);
|
||
|
+}
|
||
|
+
|
||
|
+module_init(nfs_swap_init)
|
||
|
+module_exit(nfs_swap_exit)
|
||
|
+
|
||
|
+MODULE_LICENSE("GPL");
|
||
|
+MODULE_AUTHOR("(c) 1996-2002 cH (Claus-Justus Heine)");
|
||
|
+MODULE_DESCRIPTION("Swapping to files located on volumes mounted via NFS");
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/read.c src/linux/linux/fs/nfs/read.c
|
||
|
--- src/linux/linux.orig/fs/nfs/read.c 2003-07-04 04:12:08.000000000 -0400
|
||
|
+++ src/linux/linux/fs/nfs/read.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -50,11 +50,6 @@
|
||
|
*/
|
||
|
static void nfs_readpage_result(struct rpc_task *task);
|
||
|
|
||
|
-/* Hack for future NFS swap support */
|
||
|
-#ifndef IS_SWAPFILE
|
||
|
-# define IS_SWAPFILE(inode) (0)
|
||
|
-#endif
|
||
|
-
|
||
|
static kmem_cache_t *nfs_rdata_cachep;
|
||
|
|
||
|
static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
|
||
|
@@ -92,7 +87,6 @@
|
||
|
int rsize = NFS_SERVER(inode)->rsize;
|
||
|
int result;
|
||
|
int count = PAGE_CACHE_SIZE;
|
||
|
- int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
|
||
|
int eof;
|
||
|
|
||
|
dprintk("NFS: nfs_readpage_sync(%p)\n", page);
|
||
|
@@ -114,7 +108,7 @@
|
||
|
offset, rsize, page);
|
||
|
|
||
|
lock_kernel();
|
||
|
- result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
|
||
|
+ result = NFS_PROTO(inode)->read(inode, cred, &fattr, 0,
|
||
|
offset, rsize, page, &eof);
|
||
|
nfs_refresh_inode(inode, &fattr);
|
||
|
unlock_kernel();
|
||
|
@@ -246,7 +240,7 @@
|
||
|
task = &data->task;
|
||
|
|
||
|
/* N.B. Do we need to test? Never called for swapfile inode */
|
||
|
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
|
||
|
+ flags = RPC_TASK_ASYNC;
|
||
|
|
||
|
nfs_read_rpcsetup(head, data);
|
||
|
|
||
|
@@ -476,8 +470,6 @@
|
||
|
}
|
||
|
|
||
|
error = nfs_readpage_sync(file, inode, page);
|
||
|
- if (error < 0 && IS_SWAPFILE(inode))
|
||
|
- printk("Aiee.. nfs swap-in of page failed!\n");
|
||
|
out:
|
||
|
return error;
|
||
|
|
||
|
diff -Nurb src/linux/linux.orig/fs/nfs/write.c src/linux/linux/fs/nfs/write.c
|
||
|
--- src/linux/linux.orig/fs/nfs/write.c 2003-07-04 04:12:08.000000000 -0400
|
||
|
+++ src/linux/linux/fs/nfs/write.c 2004-05-31 02:20:47.000000000 -0400
|
||
|
@@ -3,7 +3,6 @@
|
||
|
#include <linux/config.h>
|
||
|
#include <linux/types.h>
|
||
|
#include <linux/slab.h>
|
||
|
-#include <linux/swap.h>
|
||
|
#include <linux/pagemap.h>
|
||
|
#include <linux/file.h>
|
||
|
|
||
|
@@ -46,11 +45,6 @@
|
||
|
static void nfs_commit_done(struct rpc_task *);
|
||
|
#endif
|
||
|
|
||
|
-/* Hack for future NFS swap support */
|
||
|
-#ifndef IS_SWAPFILE
|
||
|
-# define IS_SWAPFILE(inode) (0)
|
||
|
-#endif
|
||
|
-
|
||
|
static kmem_cache_t *nfs_wdata_cachep;
|
||
|
|
||
|
static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
|
||
|
@@ -82,7 +76,7 @@
|
||
|
* For the moment, we just call nfs_refresh_inode().
|
||
|
*/
|
||
|
static __inline__ int
|
||
|
-nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
|
||
|
+__nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
|
||
|
{
|
||
|
if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
|
||
|
fattr->pre_size = NFS_CACHE_ISIZE(inode);
|
||
|
@@ -93,6 +87,11 @@
|
||
|
return nfs_refresh_inode(inode, fattr);
|
||
|
}
|
||
|
|
||
|
+int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
|
||
|
+{
|
||
|
+ return __nfs_write_attributes(inode, fattr);
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* Write a page synchronously.
|
||
|
* Offset is the data offset within the page.
|
||
|
@@ -104,8 +103,7 @@
|
||
|
struct rpc_cred *cred = NULL;
|
||
|
loff_t base;
|
||
|
unsigned int wsize = NFS_SERVER(inode)->wsize;
|
||
|
- int result, refresh = 0, written = 0, flags;
|
||
|
- u8 *buffer;
|
||
|
+ int result, refresh = 0, written = 0;
|
||
|
struct nfs_fattr fattr;
|
||
|
struct nfs_writeverf verf;
|
||
|
|
||
|
@@ -121,15 +119,14 @@
|
||
|
|
||
|
base = page_offset(page) + offset;
|
||
|
|
||
|
- flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
|
||
|
-
|
||
|
do {
|
||
|
- if (count < wsize && !IS_SWAPFILE(inode))
|
||
|
+ if (count < wsize)
|
||
|
wsize = count;
|
||
|
|
||
|
- result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
|
||
|
+ result = NFS_PROTO(inode)->write(inode, cred, &fattr,
|
||
|
+ NFS_RW_SYNC,
|
||
|
offset, wsize, page, &verf);
|
||
|
- nfs_write_attributes(inode, &fattr);
|
||
|
+ __nfs_write_attributes(inode, &fattr);
|
||
|
|
||
|
if (result < 0) {
|
||
|
/* Must mark the page invalid after I/O error */
|
||
|
@@ -140,7 +137,6 @@
|
||
|
printk("NFS: short write, wsize=%u, result=%d\n",
|
||
|
wsize, result);
|
||
|
refresh = 1;
|
||
|
- buffer += wsize;
|
||
|
base += wsize;
|
||
|
offset += wsize;
|
||
|
written += wsize;
|
||
|
@@ -979,7 +975,7 @@
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
- nfs_write_attributes(inode, resp->fattr);
|
||
|
+ __nfs_write_attributes(inode, resp->fattr);
|
||
|
while (!list_empty(&data->pages)) {
|
||
|
req = nfs_list_entry(data->pages.next);
|
||
|
nfs_list_remove_request(req);
|
||
|
@@ -1133,7 +1129,7 @@
|
||
|
if (nfs_async_handle_jukebox(task))
|
||
|
return;
|
||
|
|
||
|
- nfs_write_attributes(inode, resp->fattr);
|
||
|
+ __nfs_write_attributes(inode, resp->fattr);
|
||
|
while (!list_empty(&data->pages)) {
|
||
|
req = nfs_list_entry(data->pages.next);
|
||
|
nfs_list_remove_request(req);
|
||
|
diff -Nurb src/linux/linux.orig/include/linux/fs.h src/linux/linux/include/linux/fs.h
|
||
|
--- src/linux/linux.orig/include/linux/fs.h 2004-05-31 02:06:19.000000000 -0400
|
||
|
+++ src/linux/linux/include/linux/fs.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -1500,6 +1500,10 @@
|
||
|
extern int inode_change_ok(struct inode *, struct iattr *);
|
||
|
extern int inode_setattr(struct inode *, struct iattr *);
|
||
|
|
||
|
+/* for swapping to block devices */
|
||
|
+void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize);
|
||
|
+void end_buffer_io_async(struct buffer_head * bh, int uptodate);
|
||
|
+
|
||
|
/*
|
||
|
* Common dentry functions for inclusion in the VFS
|
||
|
* or in other stackable file systems. Some of these
|
||
|
diff -Nurb src/linux/linux.orig/include/linux/nfs_fs.h src/linux/linux/include/linux/nfs_fs.h
|
||
|
--- src/linux/linux.orig/include/linux/nfs_fs.h 2004-05-31 02:06:28.000000000 -0400
|
||
|
+++ src/linux/linux/include/linux/nfs_fs.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -40,8 +40,8 @@
|
||
|
*/
|
||
|
#define NFS_MAX_DIRCACHE 16
|
||
|
|
||
|
-#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
|
||
|
-#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
|
||
|
+#define NFS_MAX_FILE_IO_BUFFER_SIZE (8*PAGE_SIZE)
|
||
|
+#define NFS_DEF_FILE_IO_BUFFER_SIZE PAGE_SIZE
|
||
|
|
||
|
/*
|
||
|
* The upper limit on timeouts for the exponential backoff algorithm.
|
||
|
@@ -205,6 +205,8 @@
|
||
|
extern int nfs_writepage(struct page *);
|
||
|
extern int nfs_flush_incompatible(struct file *file, struct page *page);
|
||
|
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
|
||
|
+extern int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr);
|
||
|
+
|
||
|
/*
|
||
|
* Try to write back everything synchronously (but check the
|
||
|
* return value!)
|
||
|
@@ -375,6 +377,7 @@
|
||
|
#define NFSDBG_XDR 0x0020
|
||
|
#define NFSDBG_FILE 0x0040
|
||
|
#define NFSDBG_ROOT 0x0080
|
||
|
+#define NFSDBG_SWAP 0x0100
|
||
|
#define NFSDBG_ALL 0xFFFF
|
||
|
|
||
|
#ifdef __KERNEL__
|
||
|
diff -Nurb src/linux/linux.orig/include/linux/slab.h src/linux/linux/include/linux/slab.h
|
||
|
--- src/linux/linux.orig/include/linux/slab.h 2004-05-31 02:06:19.000000000 -0400
|
||
|
+++ src/linux/linux/include/linux/slab.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -39,6 +39,7 @@
|
||
|
#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
|
||
|
#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
|
||
|
#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
|
||
|
+#define SLAB_LOW_GFP_ORDER 0x00010000UL /* use as low a gfp order as possible */
|
||
|
|
||
|
/* flags passed to a constructor func */
|
||
|
#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
|
||
|
diff -Nurb src/linux/linux.orig/include/linux/swap.h src/linux/linux/include/linux/swap.h
|
||
|
--- src/linux/linux.orig/include/linux/swap.h 2004-05-31 02:06:19.000000000 -0400
|
||
|
+++ src/linux/linux/include/linux/swap.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -58,15 +58,29 @@
|
||
|
#define SWAP_MAP_MAX 0x7fff
|
||
|
#define SWAP_MAP_BAD 0x8000
|
||
|
|
||
|
+struct swap_ops {
|
||
|
+ int (*open)(struct file *swapf, void **data);
|
||
|
+ int (*release)(struct file *swapf, void *data);
|
||
|
+ int (*rw_page)(int rw,
|
||
|
+ struct page *page, unsigned long offset, void *data);
|
||
|
+};
|
||
|
+
|
||
|
+struct swap_method {
|
||
|
+ struct swap_method *next;
|
||
|
+ char * name;
|
||
|
+ struct swap_ops *ops;
|
||
|
+ int use_count;
|
||
|
+};
|
||
|
+
|
||
|
/*
|
||
|
* The in-memory structure used to track swap areas.
|
||
|
*/
|
||
|
struct swap_info_struct {
|
||
|
unsigned int flags;
|
||
|
- kdev_t swap_device;
|
||
|
+ struct file *swap_file;
|
||
|
+ struct swap_method *method;
|
||
|
+ void *data;
|
||
|
spinlock_t sdev_lock;
|
||
|
- struct dentry * swap_file;
|
||
|
- struct vfsmount *swap_vfsmnt;
|
||
|
unsigned short * swap_map;
|
||
|
unsigned int lowest_bit;
|
||
|
unsigned int highest_bit;
|
||
|
@@ -141,11 +155,15 @@
|
||
|
extern int total_swap_pages;
|
||
|
extern unsigned int nr_swapfiles;
|
||
|
extern struct swap_info_struct swap_info[];
|
||
|
-extern int is_swap_partition(kdev_t);
|
||
|
+extern int register_swap_method(char *name, struct swap_ops *ops);
|
||
|
+extern int unregister_swap_method(char *name);
|
||
|
+extern int swap_run_test(int (*test_fct)(unsigned int flags,
|
||
|
+ struct file *swap_file,
|
||
|
+ void *testdata), void *testdata);
|
||
|
extern void si_swapinfo(struct sysinfo *);
|
||
|
extern swp_entry_t get_swap_page(void);
|
||
|
-extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
|
||
|
- struct inode **);
|
||
|
+struct swap_method *get_swaphandle_info(swp_entry_t entry,
|
||
|
+ unsigned long *offset, void **data);
|
||
|
extern int swap_duplicate(swp_entry_t);
|
||
|
extern int swap_count(struct page *);
|
||
|
extern int valid_swaphandles(swp_entry_t, unsigned long *);
|
||
|
diff -Nurb src/linux/linux.orig/include/net/netswapping.h src/linux/linux/include/net/netswapping.h
|
||
|
--- src/linux/linux.orig/include/net/netswapping.h 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/include/net/netswapping.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,47 @@
|
||
|
+#ifndef _LINUX_NETSWAPPING_H
|
||
|
+#define _LINUX_NETSWAPPING_H
|
||
|
+
|
||
|
+#include <linux/swap.h>
|
||
|
+#include <linux/init.h>
|
||
|
+
|
||
|
+/* It is a mess. Socket options are defined in asm-ARCH/socket.h */
|
||
|
+
|
||
|
+#define SO_SWAPPING 0x00100000 /* hopefully not used by anybody else */
|
||
|
+
|
||
|
+#ifdef __KERNEL__
|
||
|
+
|
||
|
+#define CTL_NETSWAP 0x00100000
|
||
|
+
|
||
|
+enum {
|
||
|
+ NET_SWAP_DROPPED = 1,
|
||
|
+ NET_SWAP_DROP_THRESHOLD = 2,
|
||
|
+ NET_SWAP_SOCK_COUNT = 3
|
||
|
+};
|
||
|
+
|
||
|
+extern unsigned int netswap_free_pages_min;
|
||
|
+extern int netswap_sock_count;
|
||
|
+extern unsigned int netswap_dropped;
|
||
|
+
|
||
|
+/* this is "#defined" and not inline because sock.h includes us, but we need
|
||
|
+ * the "struct sock" definition.
|
||
|
+ */
|
||
|
+#define netswap_low_memory(sk, skb) \
|
||
|
+({ \
|
||
|
+ int _ret = 0; \
|
||
|
+ \
|
||
|
+ if (netswap_sock_count > 0 && /* anybody swapping via network? */ \
|
||
|
+ !(sk)->swapping && /* but we are not needed for swapping */ \
|
||
|
+ nr_free_pages() < netswap_free_pages_min) { /* so drop us */ \
|
||
|
+ printk("netswap_low_memory: " \
|
||
|
+ "dropping skb 0x%p@0x%p\n", skb, sk); \
|
||
|
+ netswap_dropped ++; \
|
||
|
+ _ret = 1; \
|
||
|
+ } \
|
||
|
+ _ret; \
|
||
|
+})
|
||
|
+
|
||
|
+extern int __init netswap_init(void);
|
||
|
+
|
||
|
+#endif
|
||
|
+
|
||
|
+#endif
|
||
|
diff -Nurb src/linux/linux.orig/include/net/sock.h src/linux/linux/include/net/sock.h
|
||
|
--- src/linux/linux.orig/include/net/sock.h 2004-05-31 02:07:17.000000000 -0400
|
||
|
+++ src/linux/linux/include/net/sock.h 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -103,6 +103,10 @@
|
||
|
#include <linux/filter.h>
|
||
|
#endif
|
||
|
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+#include <net/netswapping.h>
|
||
|
+#endif
|
||
|
+
|
||
|
#include <asm/atomic.h>
|
||
|
#include <net/dst.h>
|
||
|
|
||
|
@@ -536,6 +540,12 @@
|
||
|
no_check,
|
||
|
broadcast,
|
||
|
bsdism;
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* Increased by SO_SWAPPING with arg != 0, decreased by
|
||
|
+ * SO_SWAPPING with arg 0
|
||
|
+ */
|
||
|
+ int swapping;
|
||
|
+#endif
|
||
|
unsigned char debug;
|
||
|
unsigned char rcvtstamp;
|
||
|
unsigned char use_write_queue;
|
||
|
@@ -1165,6 +1175,11 @@
|
||
|
return err; /* Toss packet */
|
||
|
}
|
||
|
#endif /* CONFIG_FILTER */
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* an inline function defined in net/netswapping.h */
|
||
|
+ if (netswap_low_memory(sk, skb))
|
||
|
+ return -ENOMEM;
|
||
|
+#endif /* CONFIG_NETSWAP */
|
||
|
|
||
|
skb->dev = NULL;
|
||
|
skb_set_owner_r(skb, sk);
|
||
|
diff -Nurb src/linux/linux.orig/kernel/ksyms.c src/linux/linux/kernel/ksyms.c
|
||
|
--- src/linux/linux.orig/kernel/ksyms.c 2004-05-31 02:02:43.000000000 -0400
|
||
|
+++ src/linux/linux/kernel/ksyms.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -41,6 +41,7 @@
|
||
|
#include <linux/mm.h>
|
||
|
#include <linux/capability.h>
|
||
|
#include <linux/highuid.h>
|
||
|
+#include <linux/swapctl.h>
|
||
|
#include <linux/brlock.h>
|
||
|
#include <linux/fs.h>
|
||
|
#include <linux/tty.h>
|
||
|
@@ -127,6 +128,11 @@
|
||
|
EXPORT_SYMBOL(kmap_prot);
|
||
|
EXPORT_SYMBOL(kmap_pte);
|
||
|
#endif
|
||
|
+EXPORT_SYMBOL(nr_free_pages);
|
||
|
+/* EXPORT_SYMBOL(freepages); */
|
||
|
+EXPORT_SYMBOL(register_swap_method);
|
||
|
+EXPORT_SYMBOL(unregister_swap_method);
|
||
|
+EXPORT_SYMBOL(swap_run_test);
|
||
|
|
||
|
/* filesystem internal functions */
|
||
|
EXPORT_SYMBOL(def_blk_fops);
|
||
|
@@ -531,7 +537,7 @@
|
||
|
EXPORT_SYMBOL(make_bad_inode);
|
||
|
EXPORT_SYMBOL(is_bad_inode);
|
||
|
EXPORT_SYMBOL(event);
|
||
|
-EXPORT_SYMBOL(brw_page);
|
||
|
+EXPORT_SYMBOL(end_buffer_io_async);
|
||
|
EXPORT_SYMBOL(__inode_dir_notify);
|
||
|
|
||
|
#ifdef CONFIG_UID16
|
||
|
diff -Nurb src/linux/linux.orig/mm/page_io.c src/linux/linux/mm/page_io.c
|
||
|
--- src/linux/linux.orig/mm/page_io.c 2003-07-04 04:12:29.000000000 -0400
|
||
|
+++ src/linux/linux/mm/page_io.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -36,11 +36,8 @@
|
||
|
static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
|
||
|
{
|
||
|
unsigned long offset;
|
||
|
- int zones[PAGE_SIZE/512];
|
||
|
- int zones_used;
|
||
|
- kdev_t dev = 0;
|
||
|
- int block_size;
|
||
|
- struct inode *swapf = 0;
|
||
|
+ struct swap_method *method;
|
||
|
+ void *data;
|
||
|
|
||
|
if (rw == READ) {
|
||
|
ClearPageUptodate(page);
|
||
|
@@ -48,30 +45,11 @@
|
||
|
} else
|
||
|
kstat.pswpout++;
|
||
|
|
||
|
- get_swaphandle_info(entry, &offset, &dev, &swapf);
|
||
|
- if (dev) {
|
||
|
- zones[0] = offset;
|
||
|
- zones_used = 1;
|
||
|
- block_size = PAGE_SIZE;
|
||
|
- } else if (swapf) {
|
||
|
- int i, j;
|
||
|
- unsigned int block = offset
|
||
|
- << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
|
||
|
-
|
||
|
- block_size = swapf->i_sb->s_blocksize;
|
||
|
- for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
|
||
|
- if (!(zones[i] = bmap(swapf,block++))) {
|
||
|
- printk("rw_swap_page: bad swap file\n");
|
||
|
- return 0;
|
||
|
- }
|
||
|
- zones_used = i;
|
||
|
- dev = swapf->i_dev;
|
||
|
- } else {
|
||
|
+ method = get_swaphandle_info(entry, &offset, &data);
|
||
|
+ if (!method || !method->ops->rw_page(rw, page, offset, data)) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
- /* block_size == PAGE_SIZE/zones_used */
|
||
|
- brw_page(rw, page, dev, zones, block_size);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
diff -Nurb src/linux/linux.orig/mm/slab.c src/linux/linux/mm/slab.c
|
||
|
--- src/linux/linux.orig/mm/slab.c 2003-07-04 04:12:29.000000000 -0400
|
||
|
+++ src/linux/linux/mm/slab.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -111,10 +111,12 @@
|
||
|
# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
|
||
|
SLAB_POISON | SLAB_HWCACHE_ALIGN | \
|
||
|
SLAB_NO_REAP | SLAB_CACHE_DMA | \
|
||
|
- SLAB_MUST_HWCACHE_ALIGN)
|
||
|
+ SLAB_MUST_HWCACHE_ALIGN | \
|
||
|
+ SLAB_LOW_GFP_ORDER)
|
||
|
#else
|
||
|
# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
|
||
|
- SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
|
||
|
+ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
|
||
|
+ SLAB_LOW_GFP_ORDER)
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
@@ -247,8 +249,13 @@
|
||
|
};
|
||
|
|
||
|
/* internal c_flags */
|
||
|
-#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
|
||
|
-#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
|
||
|
+#define CFLGS_OFF_SLAB 0x020000UL /* slab management in own cache */
|
||
|
+#define CFLGS_OPTIMIZE 0x040000UL /* optimized slab lookup */
|
||
|
+#define CFLGS_MASK (CFLGS_OFF_SLAB | CFLGS_OPTIMIZE)
|
||
|
+
|
||
|
+#if (CFLGS_MASK & CREATE_MASK)
|
||
|
+# error BUG: internal and external SLAB flags overlap
|
||
|
+#endif
|
||
|
|
||
|
/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
|
||
|
#define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
|
||
|
@@ -452,7 +459,12 @@
|
||
|
snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size);
|
||
|
if (!(sizes->cs_cachep =
|
||
|
kmem_cache_create(name, sizes->cs_size,
|
||
|
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
|
||
|
+ 0,
|
||
|
+#if CONFIG_NETSWAP
|
||
|
+ SLAB_LOW_GFP_ORDER| /* sorry */
|
||
|
+#endif
|
||
|
+ SLAB_HWCACHE_ALIGN,
|
||
|
+ NULL, NULL))) {
|
||
|
BUG();
|
||
|
}
|
||
|
|
||
|
@@ -731,6 +743,8 @@
|
||
|
break;
|
||
|
if (!cachep->num)
|
||
|
goto next;
|
||
|
+ if (cachep->gfporder == 0 && (flags & SLAB_LOW_GFP_ORDER))
|
||
|
+ break;
|
||
|
if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
|
||
|
/* Oops, this num of objs will cause problems. */
|
||
|
cachep->gfporder--;
|
||
|
diff -Nurb src/linux/linux.orig/mm/swapfile.c src/linux/linux/mm/swapfile.c
|
||
|
--- src/linux/linux.orig/mm/swapfile.c 2003-07-04 04:12:29.000000000 -0400
|
||
|
+++ src/linux/linux/mm/swapfile.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -11,12 +11,17 @@
|
||
|
#include <linux/swap.h>
|
||
|
#include <linux/swapctl.h>
|
||
|
#include <linux/blkdev.h> /* for blk_size */
|
||
|
+#include <linux/file.h>
|
||
|
#include <linux/vmalloc.h>
|
||
|
#include <linux/pagemap.h>
|
||
|
#include <linux/shm.h>
|
||
|
|
||
|
#include <asm/pgtable.h>
|
||
|
|
||
|
+#ifdef CONFIG_KMOD
|
||
|
+#include <linux/kmod.h>
|
||
|
+#endif
|
||
|
+
|
||
|
spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
|
||
|
unsigned int nr_swapfiles;
|
||
|
int total_swap_pages;
|
||
|
@@ -31,8 +36,78 @@
|
||
|
|
||
|
struct swap_info_struct swap_info[MAX_SWAPFILES];
|
||
|
|
||
|
+static struct swap_method *swap_methods = NULL;
|
||
|
+
|
||
|
#define SWAPFILE_CLUSTER 256
|
||
|
|
||
|
+int register_swap_method(char *name, struct swap_ops *ops)
|
||
|
+{
|
||
|
+ struct swap_method *pos;
|
||
|
+ struct swap_method *new;
|
||
|
+ int result = 0;
|
||
|
+
|
||
|
+ lock_kernel();
|
||
|
+
|
||
|
+ for (pos = swap_methods; pos; pos = pos->next) {
|
||
|
+ if (strcmp(pos->name, name) == 0) {
|
||
|
+ printk(KERN_ERR "register_swap_method: "
|
||
|
+ "method %s already registered\n", name);
|
||
|
+ result = -EBUSY;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) {
|
||
|
+ printk(KERN_ERR "register_swap_method: "
|
||
|
+ "no memory for new method \"%s\"\n", name);
|
||
|
+ result = -ENOMEM;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ new->name = name;
|
||
|
+ new->ops = ops;
|
||
|
+ new->use_count = 0;
|
||
|
+
|
||
|
+ /* ok, insert at top of list */
|
||
|
+ printk("register_swap_method: method %s\n", name);
|
||
|
+ new->next = swap_methods;
|
||
|
+ swap_methods = new;
|
||
|
+ out:
|
||
|
+ unlock_kernel();
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
+int unregister_swap_method(char *name)
|
||
|
+{
|
||
|
+ struct swap_method **method, *next;
|
||
|
+ int result = 0;
|
||
|
+
|
||
|
+ lock_kernel();
|
||
|
+
|
||
|
+ for (method = &swap_methods; *method; method = &(*method)->next) {
|
||
|
+ if (strcmp((*method)->name, name) == 0) {
|
||
|
+ if ((*method)->use_count > 0) {
|
||
|
+ printk(KERN_ERR "unregister_swap_method: "
|
||
|
+ "method \"%s\" is in use\n", name);
|
||
|
+ result = -EBUSY;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ next = (*method)->next;
|
||
|
+ kfree(*method);
|
||
|
+ *method = next;
|
||
|
+ printk("unregister_swap_method: method %s\n", name);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ /* not found */
|
||
|
+ printk("unregister_swap_method: no such method %s\n", name);
|
||
|
+ result = -ENOENT;
|
||
|
+ out:
|
||
|
+ unlock_kernel();
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
static inline int scan_swap_map(struct swap_info_struct *si)
|
||
|
{
|
||
|
unsigned long offset;
|
||
|
@@ -711,13 +786,14 @@
|
||
|
struct nameidata nd;
|
||
|
int i, type, prev;
|
||
|
int err;
|
||
|
+ struct file *swap_file;
|
||
|
|
||
|
if (!capable(CAP_SYS_ADMIN))
|
||
|
return -EPERM;
|
||
|
|
||
|
err = user_path_walk(specialfile, &nd);
|
||
|
if (err)
|
||
|
- goto out;
|
||
|
+ return err;
|
||
|
|
||
|
lock_kernel();
|
||
|
prev = -1;
|
||
|
@@ -725,15 +801,20 @@
|
||
|
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
|
||
|
p = swap_info + type;
|
||
|
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
|
||
|
- if (p->swap_file == nd.dentry)
|
||
|
+ if (p->swap_file &&
|
||
|
+ p->swap_file->f_dentry == nd.dentry)
|
||
|
break;
|
||
|
}
|
||
|
prev = type;
|
||
|
}
|
||
|
err = -EINVAL;
|
||
|
+ /* p->swap_file contains all needed info, no need to keep nd, so
|
||
|
+ * release it now.
|
||
|
+ */
|
||
|
+ path_release(&nd);
|
||
|
if (type < 0) {
|
||
|
swap_list_unlock();
|
||
|
- goto out_dput;
|
||
|
+ goto out;
|
||
|
}
|
||
|
|
||
|
if (prev < 0) {
|
||
|
@@ -767,32 +848,30 @@
|
||
|
total_swap_pages += p->pages;
|
||
|
p->flags = SWP_WRITEOK;
|
||
|
swap_list_unlock();
|
||
|
- goto out_dput;
|
||
|
+ goto out;
|
||
|
}
|
||
|
- if (p->swap_device)
|
||
|
- blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
|
||
|
- path_release(&nd);
|
||
|
|
||
|
+ if (p->method->ops->release)
|
||
|
+ p->method->ops->release(p->swap_file, p->data);
|
||
|
swap_list_lock();
|
||
|
swap_device_lock(p);
|
||
|
- nd.mnt = p->swap_vfsmnt;
|
||
|
- nd.dentry = p->swap_file;
|
||
|
- p->swap_vfsmnt = NULL;
|
||
|
+ p->method->use_count --;
|
||
|
+ p->method = NULL;
|
||
|
+ p->data = NULL;
|
||
|
+ swap_file = p->swap_file;
|
||
|
p->swap_file = NULL;
|
||
|
- p->swap_device = 0;
|
||
|
p->max = 0;
|
||
|
swap_map = p->swap_map;
|
||
|
p->swap_map = NULL;
|
||
|
p->flags = 0;
|
||
|
swap_device_unlock(p);
|
||
|
swap_list_unlock();
|
||
|
+ filp_close(swap_file, NULL);
|
||
|
vfree(swap_map);
|
||
|
err = 0;
|
||
|
|
||
|
-out_dput:
|
||
|
- unlock_kernel();
|
||
|
- path_release(&nd);
|
||
|
out:
|
||
|
+ unlock_kernel();
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
@@ -805,18 +884,17 @@
|
||
|
if (!page)
|
||
|
return -ENOMEM;
|
||
|
|
||
|
- len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
|
||
|
+ len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n",
|
||
|
+ "Filename", "Type", "Size", "Used");
|
||
|
for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
|
||
|
if ((ptr->flags & SWP_USED) && ptr->swap_map) {
|
||
|
- char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
|
||
|
+ char * path = d_path(ptr->swap_file->f_dentry,
|
||
|
+ ptr->swap_file->f_vfsmnt,
|
||
|
page, PAGE_SIZE);
|
||
|
|
||
|
len += sprintf(buf + len, "%-31s ", path);
|
||
|
|
||
|
- if (!ptr->swap_device)
|
||
|
- len += sprintf(buf + len, "file\t\t");
|
||
|
- else
|
||
|
- len += sprintf(buf + len, "partition\t");
|
||
|
+ len += sprintf(buf + len, "%-15s ", ptr->method->name);
|
||
|
|
||
|
usedswap = 0;
|
||
|
for (j = 0; j < ptr->max; ++j)
|
||
|
@@ -827,7 +905,7 @@
|
||
|
default:
|
||
|
usedswap++;
|
||
|
}
|
||
|
- len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
|
||
|
+ len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10),
|
||
|
usedswap << (PAGE_SHIFT - 10), ptr->prio);
|
||
|
}
|
||
|
}
|
||
|
@@ -835,18 +913,55 @@
|
||
|
return len;
|
||
|
}
|
||
|
|
||
|
-int is_swap_partition(kdev_t dev) {
|
||
|
+/* apply a test function to all active swap objects. E.g. for checking
|
||
|
+ * whether a partition is used for swapping
|
||
|
+ */
|
||
|
+int swap_run_test(int (*test_fct)(unsigned int flags,
|
||
|
+ struct file * swap_file,
|
||
|
+ void *testdata), void *testdata)
|
||
|
+{
|
||
|
struct swap_info_struct *ptr = swap_info;
|
||
|
int i;
|
||
|
|
||
|
for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
|
||
|
- if (ptr->flags & SWP_USED)
|
||
|
- if (ptr->swap_device == dev)
|
||
|
+ if (ptr->swap_file &&
|
||
|
+ test_fct(ptr->flags, ptr->swap_file, testdata))
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/* Walk through the list of known swap method until somebody wants to
|
||
|
+ * handle this file. Pick the first one which claims to be able to
|
||
|
+ * swap to this kind of file.
|
||
|
+ *
|
||
|
+ * return value: < 0: error, 0: not found, > 0: swapfilesize
|
||
|
+ */
|
||
|
+int find_swap_method(struct file *swap_file,
|
||
|
+ struct swap_info_struct *p)
|
||
|
+{
|
||
|
+ int swapfilesize = 0;
|
||
|
+ struct swap_method *method;
|
||
|
+
|
||
|
+ p->method = NULL;
|
||
|
+ for (method = swap_methods; method; method = method->next) {
|
||
|
+ swapfilesize = method->ops->open(swap_file, &p->data);
|
||
|
+ if (swapfilesize == 0) {
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+ if (swapfilesize > 0) {
|
||
|
+ p->method = method;
|
||
|
+ p->method->use_count ++;
|
||
|
+ p->swap_file = swap_file;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ if (swapfilesize < 0) {
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return swapfilesize;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
|
||
|
*
|
||
|
@@ -855,8 +970,6 @@
|
||
|
asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
|
||
|
{
|
||
|
struct swap_info_struct * p;
|
||
|
- struct nameidata nd;
|
||
|
- struct inode * swap_inode;
|
||
|
unsigned int type;
|
||
|
int i, j, prev;
|
||
|
int error;
|
||
|
@@ -866,8 +979,9 @@
|
||
|
int nr_good_pages = 0;
|
||
|
unsigned long maxpages = 1;
|
||
|
int swapfilesize;
|
||
|
- struct block_device *bdev = NULL;
|
||
|
unsigned short *swap_map;
|
||
|
+ char * tmp_specialfile;
|
||
|
+ struct file *swap_file;
|
||
|
|
||
|
if (!capable(CAP_SYS_ADMIN))
|
||
|
return -EPERM;
|
||
|
@@ -886,8 +1000,7 @@
|
||
|
nr_swapfiles = type+1;
|
||
|
p->flags = SWP_USED;
|
||
|
p->swap_file = NULL;
|
||
|
- p->swap_vfsmnt = NULL;
|
||
|
- p->swap_device = 0;
|
||
|
+ p->method = NULL;
|
||
|
p->swap_map = NULL;
|
||
|
p->lowest_bit = 0;
|
||
|
p->highest_bit = 0;
|
||
|
@@ -901,53 +1014,56 @@
|
||
|
p->prio = --least_priority;
|
||
|
}
|
||
|
swap_list_unlock();
|
||
|
- error = user_path_walk(specialfile, &nd);
|
||
|
- if (error)
|
||
|
+
|
||
|
+ /* Open the swap using filp_open. Bail out on any errors. */
|
||
|
+ tmp_specialfile = getname(specialfile);
|
||
|
+ if (IS_ERR(tmp_specialfile)) {
|
||
|
+ error = PTR_ERR(tmp_specialfile);
|
||
|
goto bad_swap_2;
|
||
|
+ }
|
||
|
+ p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600);
|
||
|
+ putname(tmp_specialfile);
|
||
|
+ if (IS_ERR(p->swap_file)) {
|
||
|
+ error = PTR_ERR(p->swap_file);
|
||
|
+ goto bad_swap_1;
|
||
|
+ }
|
||
|
|
||
|
- p->swap_file = nd.dentry;
|
||
|
- p->swap_vfsmnt = nd.mnt;
|
||
|
- swap_inode = nd.dentry->d_inode;
|
||
|
error = -EINVAL;
|
||
|
|
||
|
- if (S_ISBLK(swap_inode->i_mode)) {
|
||
|
- kdev_t dev = swap_inode->i_rdev;
|
||
|
- struct block_device_operations *bdops;
|
||
|
- devfs_handle_t de;
|
||
|
-
|
||
|
- p->swap_device = dev;
|
||
|
- set_blocksize(dev, PAGE_SIZE);
|
||
|
-
|
||
|
- bd_acquire(swap_inode);
|
||
|
- bdev = swap_inode->i_bdev;
|
||
|
- de = devfs_get_handle_from_inode(swap_inode);
|
||
|
- bdops = devfs_get_ops(de); /* Increments module use count */
|
||
|
- if (bdops) bdev->bd_op = bdops;
|
||
|
-
|
||
|
- error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
|
||
|
- devfs_put_ops(de);/*Decrement module use count now we're safe*/
|
||
|
- if (error)
|
||
|
- goto bad_swap_2;
|
||
|
- set_blocksize(dev, PAGE_SIZE);
|
||
|
- error = -ENODEV;
|
||
|
- if (!dev || (blk_size[MAJOR(dev)] &&
|
||
|
- !blk_size[MAJOR(dev)][MINOR(dev)]))
|
||
|
- goto bad_swap;
|
||
|
- swapfilesize = 0;
|
||
|
- if (blk_size[MAJOR(dev)])
|
||
|
- swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
|
||
|
- >> (PAGE_SHIFT - 10);
|
||
|
- } else if (S_ISREG(swap_inode->i_mode))
|
||
|
- swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
|
||
|
- else
|
||
|
- goto bad_swap;
|
||
|
+ swapfilesize = find_swap_method(p->swap_file, p);
|
||
|
+ if (swapfilesize < 0) {
|
||
|
+ error = swapfilesize;
|
||
|
+ goto bad_swap_1;
|
||
|
+ }
|
||
|
+#ifdef CONFIG_KMOD
|
||
|
+ if (swapfilesize == 0) {
|
||
|
+ (void)request_module("swapfile-mod");
|
||
|
+
|
||
|
+ swapfilesize = find_swap_method(p->swap_file, p);
|
||
|
+ if (swapfilesize < 0) {
|
||
|
+ error = swapfilesize;
|
||
|
+ goto bad_swap_1;
|
||
|
+ }
|
||
|
+ }
|
||
|
+#endif
|
||
|
+ if (swapfilesize == 0) {
|
||
|
+ printk("Don't know how to swap to this kind of file\n");
|
||
|
+ goto bad_swap_1; /* free swap map */
|
||
|
+ }
|
||
|
+
|
||
|
+ /* After this point, the swap-file has been opened by the swap
|
||
|
+ * method. We must make sure to use the bad_swap label for any
|
||
|
+ * errors.
|
||
|
+ */
|
||
|
|
||
|
error = -EBUSY;
|
||
|
for (i = 0 ; i < nr_swapfiles ; i++) {
|
||
|
struct swap_info_struct *q = &swap_info[i];
|
||
|
if (i == type || !q->swap_file)
|
||
|
continue;
|
||
|
- if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
|
||
|
+ if (p->swap_file->f_dentry->d_inode->i_mapping
|
||
|
+ ==
|
||
|
+ q->swap_file->f_dentry->d_inode->i_mapping)
|
||
|
goto bad_swap;
|
||
|
}
|
||
|
|
||
|
@@ -1083,17 +1199,27 @@
|
||
|
swap_list_unlock();
|
||
|
error = 0;
|
||
|
goto out;
|
||
|
+
|
||
|
bad_swap:
|
||
|
- if (bdev)
|
||
|
- blkdev_put(bdev, BDEV_SWAP);
|
||
|
+ if (p->method->ops->release)
|
||
|
+ p->method->ops->release(p->swap_file, p->data);
|
||
|
+ swap_list_lock();
|
||
|
+ p->method->use_count --;
|
||
|
+ p->method = NULL;
|
||
|
+ p->data = NULL;
|
||
|
+ swap_list_unlock();
|
||
|
+
|
||
|
+bad_swap_1:
|
||
|
+ swap_list_lock();
|
||
|
+ swap_file = p->swap_file;
|
||
|
+ p->swap_file = NULL;
|
||
|
+ swap_list_unlock();
|
||
|
+ filp_close(swap_file, NULL);
|
||
|
+
|
||
|
bad_swap_2:
|
||
|
+
|
||
|
swap_list_lock();
|
||
|
swap_map = p->swap_map;
|
||
|
- nd.mnt = p->swap_vfsmnt;
|
||
|
- nd.dentry = p->swap_file;
|
||
|
- p->swap_device = 0;
|
||
|
- p->swap_file = NULL;
|
||
|
- p->swap_vfsmnt = NULL;
|
||
|
p->swap_map = NULL;
|
||
|
p->flags = 0;
|
||
|
if (!(swap_flags & SWAP_FLAG_PREFER))
|
||
|
@@ -1101,7 +1227,7 @@
|
||
|
swap_list_unlock();
|
||
|
if (swap_map)
|
||
|
vfree(swap_map);
|
||
|
- path_release(&nd);
|
||
|
+
|
||
|
out:
|
||
|
if (swap_header)
|
||
|
free_page((long) swap_header);
|
||
|
@@ -1217,8 +1343,8 @@
|
||
|
/*
|
||
|
* Prior swap_duplicate protects against swap device deletion.
|
||
|
*/
|
||
|
-void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
|
||
|
- kdev_t *dev, struct inode **swapf)
|
||
|
+struct swap_method *get_swaphandle_info(swp_entry_t entry,
|
||
|
+ unsigned long *offset, void **data)
|
||
|
{
|
||
|
unsigned long type;
|
||
|
struct swap_info_struct *p;
|
||
|
@@ -1226,32 +1352,26 @@
|
||
|
type = SWP_TYPE(entry);
|
||
|
if (type >= nr_swapfiles) {
|
||
|
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
|
||
|
- return;
|
||
|
+ return NULL;
|
||
|
}
|
||
|
|
||
|
p = &swap_info[type];
|
||
|
*offset = SWP_OFFSET(entry);
|
||
|
if (*offset >= p->max && *offset != 0) {
|
||
|
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
|
||
|
- return;
|
||
|
+ return NULL;
|
||
|
}
|
||
|
if (p->swap_map && !p->swap_map[*offset]) {
|
||
|
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
|
||
|
- return;
|
||
|
+ return NULL;
|
||
|
}
|
||
|
if (!(p->flags & SWP_USED)) {
|
||
|
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
|
||
|
- return;
|
||
|
+ return NULL;
|
||
|
}
|
||
|
|
||
|
- if (p->swap_device) {
|
||
|
- *dev = p->swap_device;
|
||
|
- } else if (p->swap_file) {
|
||
|
- *swapf = p->swap_file->d_inode;
|
||
|
- } else {
|
||
|
- printk(KERN_ERR "rw_swap_page: no swap file or device\n");
|
||
|
- }
|
||
|
- return;
|
||
|
+ *data = p->data;
|
||
|
+ return p->method;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
diff -Nurb src/linux/linux.orig/net/Config.in src/linux/linux/net/Config.in
|
||
|
--- src/linux/linux.orig/net/Config.in 2003-07-04 04:12:29.000000000 -0400
|
||
|
+++ src/linux/linux/net/Config.in 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -16,6 +16,9 @@
|
||
|
fi
|
||
|
bool 'Socket Filtering' CONFIG_FILTER
|
||
|
tristate 'Unix domain sockets' CONFIG_UNIX
|
||
|
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
|
||
|
+ bool 'Swapping via network sockets (EXPERIMENTAL)' CONFIG_NETSWAP
|
||
|
+fi
|
||
|
bool 'TCP/IP networking' CONFIG_INET
|
||
|
if [ "$CONFIG_INET" = "y" ]; then
|
||
|
source net/ipv4/Config.in
|
||
|
diff -Nurb src/linux/linux.orig/net/Makefile src/linux/linux/net/Makefile
|
||
|
--- src/linux/linux.orig/net/Makefile 2003-07-04 04:12:29.000000000 -0400
|
||
|
+++ src/linux/linux/net/Makefile 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -51,6 +51,7 @@
|
||
|
ifeq ($(CONFIG_NET),y)
|
||
|
obj-$(CONFIG_MODULES) += netsyms.o
|
||
|
obj-$(CONFIG_SYSCTL) += sysctl_net.o
|
||
|
+obj-$(CONFIG_NETSWAP) += netswapping.o
|
||
|
endif
|
||
|
|
||
|
include $(TOPDIR)/Rules.make
|
||
|
diff -Nurb src/linux/linux.orig/net/core/sock.c src/linux/linux/net/core/sock.c
|
||
|
--- src/linux/linux.orig/net/core/sock.c 2003-10-14 04:09:32.000000000 -0400
|
||
|
+++ src/linux/linux/net/core/sock.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -402,6 +402,21 @@
|
||
|
ret = -ENONET;
|
||
|
break;
|
||
|
#endif
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ case SO_SWAPPING:
|
||
|
+ if (valbool) {
|
||
|
+ if (!sk->swapping) {
|
||
|
+ netswap_sock_count ++;
|
||
|
+ }
|
||
|
+ sk->swapping ++;
|
||
|
+ } else if (sk->swapping > 0) {
|
||
|
+ sk->swapping --;
|
||
|
+ if (!sk->swapping) {
|
||
|
+ netswap_sock_count --;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ break;
|
||
|
+#endif
|
||
|
/* We implement the SO_SNDLOWAT etc to
|
||
|
not be settable (1003.1g 5.3) */
|
||
|
default:
|
||
|
@@ -552,6 +567,12 @@
|
||
|
goto lenout;
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ case SO_SWAPPING:
|
||
|
+ v.val = sk->swapping;
|
||
|
+ break;
|
||
|
+#endif
|
||
|
+
|
||
|
/* Dubious BSD thing... Probably nobody even uses it, but
|
||
|
* the UNIX standard wants it for whatever reason... -DaveM
|
||
|
*/
|
||
|
diff -Nurb src/linux/linux.orig/net/ipv4/tcp_ipv4.c src/linux/linux/net/ipv4/tcp_ipv4.c
|
||
|
--- src/linux/linux.orig/net/ipv4/tcp_ipv4.c 2003-10-14 04:09:33.000000000 -0400
|
||
|
+++ src/linux/linux/net/ipv4/tcp_ipv4.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -1657,6 +1657,12 @@
|
||
|
if (filter && sk_filter(skb, filter))
|
||
|
goto discard;
|
||
|
#endif /* CONFIG_FILTER */
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* tcp doesn't use sock_queue_rcv_skb() ... */
|
||
|
+ /* an inline function defined in net/netswapping.h */
|
||
|
+ if (netswap_low_memory(sk, skb))
|
||
|
+ goto discard;
|
||
|
+#endif /* CONFIG_NETSWAP */
|
||
|
|
||
|
IP_INC_STATS_BH(IpInDelivers);
|
||
|
|
||
|
diff -Nurb src/linux/linux.orig/net/ipv6/tcp_ipv6.c src/linux/linux/net/ipv6/tcp_ipv6.c
|
||
|
--- src/linux/linux.orig/net/ipv6/tcp_ipv6.c 2003-10-14 04:09:34.000000000 -0400
|
||
|
+++ src/linux/linux/net/ipv6/tcp_ipv6.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -1424,6 +1424,12 @@
|
||
|
if (filter && sk_filter(skb, filter))
|
||
|
goto discard;
|
||
|
#endif /* CONFIG_FILTER */
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* tcp doesn't use sock_queue_rcv_skb() ... */
|
||
|
+ /* an inline function defined in net/netswapping.h */
|
||
|
+ if (netswap_low_memory(sk, skb))
|
||
|
+ goto discard;
|
||
|
+#endif /* CONFIG_NETSWAP */
|
||
|
|
||
|
/*
|
||
|
* socket locking is here for SMP purposes as backlog rcv
|
||
|
diff -Nurb src/linux/linux.orig/net/netswapping.c src/linux/linux/net/netswapping.c
|
||
|
--- src/linux/linux.orig/net/netswapping.c 1969-12-31 19:00:00.000000000 -0500
|
||
|
+++ src/linux/linux/net/netswapping.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -0,0 +1,76 @@
|
||
|
+/*
|
||
|
+ * linux/net/swapping.c
|
||
|
+ *
|
||
|
+ * Support paging over network connections (inet only)
|
||
|
+ *
|
||
|
+ * (c) 2000 Claus-Justus Heine <heine@instmath.rwth-aachen.de>
|
||
|
+ */
|
||
|
+
|
||
|
+#include <linux/slab.h>
|
||
|
+#include <linux/swap.h>
|
||
|
+#include <linux/swapctl.h>
|
||
|
+#include <linux/skbuff.h>
|
||
|
+#include <linux/module.h>
|
||
|
+#include <linux/sysctl.h>
|
||
|
+#include <linux/init.h>
|
||
|
+#include <net/netswapping.h>
|
||
|
+#include <net/sock.h>
|
||
|
+#include <asm/uaccess.h>
|
||
|
+
|
||
|
+unsigned int netswap_dropped; /* statistics */
|
||
|
+unsigned int netswap_free_pages_min;
|
||
|
+int netswap_sock_count; /* how many sockets have swapping option set */
|
||
|
+
|
||
|
+#ifdef CONFIG_SYSCTL
|
||
|
+
|
||
|
+static ctl_table netswap_table[] = {
|
||
|
+ {NET_SWAP_DROPPED, "dropped",
|
||
|
+ &netswap_dropped, sizeof(int), 0644, NULL, &proc_dointvec },
|
||
|
+ {NET_SWAP_DROP_THRESHOLD, "threshold",
|
||
|
+ &netswap_free_pages_min, sizeof(int), 0644, NULL, &proc_dointvec },
|
||
|
+ {NET_SWAP_SOCK_COUNT, "sock_count",
|
||
|
+ &netswap_sock_count, sizeof(int), 0444, NULL, &proc_dointvec },
|
||
|
+ {0},
|
||
|
+};
|
||
|
+
|
||
|
+static struct ctl_table_header *netswap_sysctl_header;
|
||
|
+
|
||
|
+static ctl_table netswap_net_table[] = {
|
||
|
+ {CTL_NETSWAP, "swapping", NULL, 0, 0555, netswap_table},
|
||
|
+ {0}
|
||
|
+};
|
||
|
+
|
||
|
+static ctl_table netswap_root_table[] = {
|
||
|
+ {CTL_NET, "net", NULL, 0, 0555, netswap_net_table},
|
||
|
+ {0}
|
||
|
+};
|
||
|
+
|
||
|
+#endif
|
||
|
+
|
||
|
+int __init netswap_init(void)
|
||
|
+{
|
||
|
+ /* drop packets when below this threshold */
|
||
|
+ netswap_free_pages_min = 32 /* freepages.min */;
|
||
|
+#ifdef CONFIG_SYSCTL
|
||
|
+ netswap_sysctl_header = register_sysctl_table(netswap_root_table, 0);
|
||
|
+#endif
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+void __exit netswap_exit(void)
|
||
|
+{
|
||
|
+#ifdef CONFIG_SYSCTL
|
||
|
+ unregister_sysctl_table(netswap_sysctl_header);
|
||
|
+#endif
|
||
|
+}
|
||
|
+
|
||
|
+/* linux/init.h -- VERY nice :-)
|
||
|
+ *
|
||
|
+ * On the other hand, we have no control over the order the initcalls
|
||
|
+ * are performed ...
|
||
|
+ *
|
||
|
+ * Actually, we are not compiled as module ...
|
||
|
+ */
|
||
|
+
|
||
|
+module_init(netswap_init)
|
||
|
+module_exit(netswap_exit)
|
||
|
diff -Nurb src/linux/linux.orig/net/netsyms.c src/linux/linux/net/netsyms.c
|
||
|
--- src/linux/linux.orig/net/netsyms.c 2004-05-31 02:02:49.000000000 -0400
|
||
|
+++ src/linux/linux/net/netsyms.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -601,4 +601,10 @@
|
||
|
EXPORT_SYMBOL(wireless_send_event);
|
||
|
#endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
|
||
|
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+EXPORT_SYMBOL(netswap_sock_count);
|
||
|
+EXPORT_SYMBOL(netswap_free_pages_min);
|
||
|
+EXPORT_SYMBOL(netswap_dropped);
|
||
|
+#endif
|
||
|
+
|
||
|
#endif /* CONFIG_NET */
|
||
|
diff -Nurb src/linux/linux.orig/net/packet/af_packet.c src/linux/linux/net/packet/af_packet.c
|
||
|
--- src/linux/linux.orig/net/packet/af_packet.c 2003-10-14 04:09:35.000000000 -0400
|
||
|
+++ src/linux/linux/net/packet/af_packet.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -449,6 +449,12 @@
|
||
|
snaplen = res;
|
||
|
}
|
||
|
#endif /* CONFIG_FILTER */
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* packet doesn't use sock_queue_rcv_skb() ... */
|
||
|
+ /* an inline function defined in net/netswapping.h */
|
||
|
+ if (netswap_low_memory(sk, skb))
|
||
|
+ goto drop_n_restore;
|
||
|
+#endif /* CONFIG_NETSWAP */
|
||
|
|
||
|
if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
|
||
|
goto drop_n_acct;
|
||
|
@@ -496,7 +502,7 @@
|
||
|
po->stats.tp_drops++;
|
||
|
spin_unlock(&sk->receive_queue.lock);
|
||
|
|
||
|
-#ifdef CONFIG_FILTER
|
||
|
+#if defined(CONFIG_FILTER) || defined(CONFIG_NETSWAP)
|
||
|
drop_n_restore:
|
||
|
#endif
|
||
|
if (skb_head != skb->data && skb_shared(skb)) {
|
||
|
@@ -557,6 +563,12 @@
|
||
|
snaplen = res;
|
||
|
}
|
||
|
#endif
|
||
|
+#ifdef CONFIG_NETSWAP
|
||
|
+ /* packet doesn't use sock_queue_rcv_skb() ... */
|
||
|
+ /* an inline function defined in net/netswapping.h */
|
||
|
+ if (netswap_low_memory(sk, skb))
|
||
|
+ goto drop_n_restore;
|
||
|
+#endif /* CONFIG_NETSWAP */
|
||
|
|
||
|
if (sk->type == SOCK_DGRAM) {
|
||
|
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
|
||
|
diff -Nurb src/linux/linux.orig/net/sunrpc/sched.c src/linux/linux/net/sunrpc/sched.c
|
||
|
--- src/linux/linux.orig/net/sunrpc/sched.c 2003-07-04 04:12:33.000000000 -0400
|
||
|
+++ src/linux/linux/net/sunrpc/sched.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -79,10 +79,11 @@
|
||
|
*/
|
||
|
static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
|
||
|
|
||
|
+#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
|
||
|
/*
|
||
|
* This is the last-ditch buffer for NFS swap requests
|
||
|
*/
|
||
|
-static u32 swap_buffer[PAGE_SIZE >> 2];
|
||
|
+static u32 swap_buffer[2*PAGE_SIZE >> 2];
|
||
|
static long swap_buffer_used;
|
||
|
|
||
|
/*
|
||
|
@@ -96,6 +97,7 @@
|
||
|
{
|
||
|
clear_bit(1, &swap_buffer_used);
|
||
|
}
|
||
|
+#endif
|
||
|
|
||
|
/*
|
||
|
* Disable the timer for a given RPC task. Should be called with
|
||
|
@@ -501,6 +503,7 @@
|
||
|
__rpc_execute(struct rpc_task *task)
|
||
|
{
|
||
|
int status = 0;
|
||
|
+ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
|
||
|
|
||
|
dprintk("RPC: %4d rpc_execute flgs %x\n",
|
||
|
task->tk_pid, task->tk_flags);
|
||
|
@@ -510,6 +513,13 @@
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+ if (task->tk_flags & RPC_TASK_SWAPPER) {
|
||
|
+ if (!current->flags & PF_MEMALLOC) {
|
||
|
+ dprintk("__rpc_execute: Setting PF_MEMALLOC\n");
|
||
|
+ }
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+ }
|
||
|
+
|
||
|
restarted:
|
||
|
while (1) {
|
||
|
/*
|
||
|
@@ -554,7 +564,8 @@
|
||
|
rpc_set_sleeping(task);
|
||
|
if (RPC_IS_ASYNC(task)) {
|
||
|
spin_unlock_bh(&rpc_queue_lock);
|
||
|
- return 0;
|
||
|
+ status = 0;
|
||
|
+ goto out;
|
||
|
}
|
||
|
}
|
||
|
spin_unlock_bh(&rpc_queue_lock);
|
||
|
@@ -563,7 +574,12 @@
|
||
|
/* sync task: sleep here */
|
||
|
dprintk("RPC: %4d sync task going to sleep\n",
|
||
|
task->tk_pid);
|
||
|
- if (current->pid == rpciod_pid)
|
||
|
+ /* it's ok to wait for rpciod when swapping,
|
||
|
+ * because this means it needed memory and is
|
||
|
+ * doing the swap-out itself.
|
||
|
+ */
|
||
|
+ if (current->pid == rpciod_pid &&
|
||
|
+ !(task->tk_flags & RPC_TASK_SWAPPER))
|
||
|
printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
|
||
|
|
||
|
__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
|
||
|
@@ -608,6 +624,10 @@
|
||
|
/* Release all resources associated with the task */
|
||
|
rpc_release_task(task);
|
||
|
|
||
|
+ out:
|
||
|
+ if (!alloc_flag) {
|
||
|
+ current->flags &= ~PF_MEMALLOC;
|
||
|
+ }
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
@@ -699,10 +719,16 @@
|
||
|
{
|
||
|
u32 *buffer;
|
||
|
int gfp;
|
||
|
+ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
|
||
|
+ void *ret = NULL;
|
||
|
|
||
|
- if (flags & RPC_TASK_SWAPPER)
|
||
|
+ if (flags & RPC_TASK_SWAPPER) {
|
||
|
gfp = GFP_ATOMIC;
|
||
|
- else if (flags & RPC_TASK_ASYNC)
|
||
|
+ if (!(current->flags & PF_MEMALLOC)) {
|
||
|
+ dprintk("rpc_allocate: Setting PF_MEMALLOC\n");
|
||
|
+ }
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+ } else if (flags & RPC_TASK_ASYNC)
|
||
|
gfp = GFP_RPC;
|
||
|
else
|
||
|
gfp = GFP_KERNEL;
|
||
|
@@ -710,29 +736,44 @@
|
||
|
do {
|
||
|
if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
|
||
|
dprintk("RPC: allocated buffer %p\n", buffer);
|
||
|
- return buffer;
|
||
|
+ ret = buffer;
|
||
|
+ goto out;
|
||
|
}
|
||
|
+#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
|
||
|
if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
|
||
|
&& rpc_lock_swapbuf()) {
|
||
|
dprintk("RPC: used last-ditch swap buffer\n");
|
||
|
- return swap_buffer;
|
||
|
+ ret = swap_buffer;
|
||
|
+ goto out;
|
||
|
+#endif
|
||
|
+ }
|
||
|
+ if (flags & RPC_TASK_ASYNC) {
|
||
|
+ ret = NULL;
|
||
|
+ goto out;
|
||
|
}
|
||
|
- if (flags & RPC_TASK_ASYNC)
|
||
|
- return NULL;
|
||
|
yield();
|
||
|
} while (!signalled());
|
||
|
|
||
|
- return NULL;
|
||
|
+ out:
|
||
|
+ if (!alloc_flag) {
|
||
|
+ current->flags &= ~PF_MEMALLOC;
|
||
|
+ }
|
||
|
+ return ret;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
rpc_free(void *buffer)
|
||
|
{
|
||
|
+#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
|
||
|
if (buffer != swap_buffer) {
|
||
|
+#endif
|
||
|
kfree(buffer);
|
||
|
return;
|
||
|
+#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
|
||
|
}
|
||
|
rpc_unlock_swapbuf();
|
||
|
+ printk("RPC: Released swap buffer\n");
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
diff -Nurb src/linux/linux.orig/net/sunrpc/xprt.c src/linux/linux/net/sunrpc/xprt.c
|
||
|
--- src/linux/linux.orig/net/sunrpc/xprt.c 2003-07-04 04:12:33.000000000 -0400
|
||
|
+++ src/linux/linux/net/sunrpc/xprt.c 2004-05-31 02:18:03.000000000 -0400
|
||
|
@@ -139,7 +139,7 @@
|
||
|
__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
|
||
|
{
|
||
|
if (!xprt->snd_task) {
|
||
|
- if (xprt->nocong || __xprt_get_cong(xprt, task))
|
||
|
+ if (__xprt_get_cong(xprt, task))
|
||
|
xprt->snd_task = task;
|
||
|
}
|
||
|
if (xprt->snd_task != task) {
|
||
|
@@ -179,7 +179,7 @@
|
||
|
if (!task)
|
||
|
return;
|
||
|
}
|
||
|
- if (xprt->nocong || __xprt_get_cong(xprt, task))
|
||
|
+ if (__xprt_get_cong(xprt, task))
|
||
|
xprt->snd_task = task;
|
||
|
}
|
||
|
|
||
|
@@ -276,6 +276,9 @@
|
||
|
{
|
||
|
struct rpc_rqst *req = task->tk_rqstp;
|
||
|
|
||
|
+ if (xprt->nocong || RPC_IS_SWAPPER(task))
|
||
|
+ return 1;
|
||
|
+
|
||
|
if (req->rq_cong)
|
||
|
return 1;
|
||
|
dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
|