

Since Jens changed the block layer to fail readahead if the queue has no
requests free, a few changes suggest themselves.

- It's a bit silly to go and alocate a bunch of pages, build BIOs for them,
  submit the IO only to have it fail, forcing us to free the pages again.

  So the patch changes do_page_cache_readahead() to peek at the queue's
  read_congested state.  If the queue is read-congested we abandon the entire
  readahead up-front without doing all that work.

- If the queue is not read-congested, we go ahead and do the readahead,
  after having set PF_READAHEAD.

  The backing_dev_info's read-congested threshold cuts in when 7/8ths of
  the queue's requests are in flight, so it is probable that the readahead
  abandonment code in __make_request will now almost never trigger.

- The above changes make do_page_cache_readahead() "unreliable", in that it
  may do nothing at all.

  However there are some system calls:

	- fadvise(POSIX_FADV_WILLNEED)
	- madvise(MADV_WILLNEED)
	- sys_readahead()

  In which the user has an expectation that the kernel will actually
  perform the IO.  

  So the patch creates a new "force_page_cache_readahead()" which will
  perform the IO regardless of the queue's congestion state.

  Arguably, this is the wrong thing to do: even though the application
  requested readahead it could be that the kernel _should_ abandon the user's
  request because the disk is so busy.

  I don't know.  But for now, let's keep the above syscalls behaviour
  unchanged.  It is trivial to switch back to do_page_cache_readahead()
  later.



 25-akpm/include/linux/mm.h |    2 ++
 25-akpm/mm/fadvise.c       |    2 +-
 25-akpm/mm/filemap.c       |    8 +++++---
 25-akpm/mm/madvise.c       |    2 +-
 25-akpm/mm/readahead.c     |   28 +++++++++++++++++++++++-----
 5 files changed, 32 insertions(+), 10 deletions(-)

diff -puN mm/readahead.c~force_page_cache_readahead mm/readahead.c
--- 25/mm/readahead.c~force_page_cache_readahead	Wed Jul 30 12:47:04 2003
+++ 25-akpm/mm/readahead.c	Wed Jul 30 12:53:46 2003
@@ -96,8 +96,6 @@ static int read_pages(struct address_spa
 	struct pagevec lru_pvec;
 	int ret = 0;
 
-	current->flags |= PF_READAHEAD;
-
 	if (mapping->a_ops->readpages) {
 		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
 		goto out;
@@ -118,7 +116,6 @@ static int read_pages(struct address_spa
 	}
 	pagevec_lru_add(&lru_pvec);
 out:
-	current->flags &= ~PF_READAHEAD;
 	return ret;
 }
 
@@ -263,8 +260,8 @@ out:
  * Chunk the readahead into 2 megabyte units, so that we don't pin too much
  * memory at once.
  */
-int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			unsigned long offset, unsigned long nr_to_read)
+int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
+		unsigned long offset, unsigned long nr_to_read)
 {
 	int ret = 0;
 
@@ -290,6 +287,27 @@ int do_page_cache_readahead(struct addre
 	}
 	return ret;
 }
+
+/*
+ * This version skips the IO if the queue is read-congested, and will tell the
+ * block layer to abandon the readahead if request allocation would block.
+ *
+ * force_page_cache_readahead() will ignore queue congestion and will block on
+ * request queues.
+ */
+int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
+			unsigned long offset, unsigned long nr_to_read)
+{
+	int ret = 0;
+
+	if (!bdi_read_congested(mapping->backing_dev_info)) {
+		current->flags |= PF_READAHEAD;
+		ret = __do_page_cache_readahead(mapping, filp,
+						offset, nr_to_read);
+		current->flags &= ~PF_READAHEAD;
+	}
+	return ret;
+}
 
 /*
  * Check how effective readahead is being.  If the amount of started IO is
diff -puN mm/fadvise.c~force_page_cache_readahead mm/fadvise.c
--- 25/mm/fadvise.c~force_page_cache_readahead	Wed Jul 30 12:47:25 2003
+++ 25-akpm/mm/fadvise.c	Wed Jul 30 13:32:26 2003
@@ -56,7 +56,7 @@ long sys_fadvise64(int fd, loff_t offset
 			ret = -EINVAL;
 			break;
 		}
-		ret = do_page_cache_readahead(mapping, file,
+		ret = force_page_cache_readahead(mapping, file,
 				offset >> PAGE_CACHE_SHIFT,
 				max_sane_readahead(len >> PAGE_CACHE_SHIFT));
 		if (ret > 0)
diff -puN mm/filemap.c~force_page_cache_readahead mm/filemap.c
--- 25/mm/filemap.c~force_page_cache_readahead	Wed Jul 30 12:47:29 2003
+++ 25-akpm/mm/filemap.c	Wed Jul 30 13:33:50 2003
@@ -870,7 +870,8 @@ do_readahead(struct address_space *mappi
 	if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
 		return -EINVAL;
 
-	do_page_cache_readahead(mapping, filp, index, max_sane_readahead(nr));
+	force_page_cache_readahead(mapping, filp, index,
+					max_sane_readahead(nr));
 	return 0;
 }
 
@@ -996,7 +997,8 @@ retry_find:
 			goto no_cached_page;
 
 		did_readaround = 1;
-		do_page_cache_readahead(mapping, file, pgoff & ~(MMAP_READAROUND-1), MMAP_READAROUND);
+		do_page_cache_readahead(mapping, file,
+				pgoff & ~(MMAP_READAROUND-1), MMAP_READAROUND);
 		goto retry_find;
 	}
 
@@ -1230,7 +1232,7 @@ static int filemap_populate(struct vm_ar
 	int err;
 
 	if (!nonblock)
-		do_page_cache_readahead(mapping, vma->vm_file,
+		force_page_cache_readahead(mapping, vma->vm_file,
 					pgoff, len >> PAGE_CACHE_SHIFT);
 
 repeat:
diff -puN mm/madvise.c~force_page_cache_readahead mm/madvise.c
--- 25/mm/madvise.c~force_page_cache_readahead	Wed Jul 30 12:47:34 2003
+++ 25-akpm/mm/madvise.c	Wed Jul 30 13:33:58 2003
@@ -65,7 +65,7 @@ static long madvise_willneed(struct vm_a
 		end = vma->vm_end;
 	end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
-	do_page_cache_readahead(file->f_dentry->d_inode->i_mapping,
+	force_page_cache_readahead(file->f_dentry->d_inode->i_mapping,
 			file, start, max_sane_readahead(end - start));
 	return 0;
 }
diff -puN include/linux/mm.h~force_page_cache_readahead include/linux/mm.h
--- 25/include/linux/mm.h~force_page_cache_readahead	Wed Jul 30 12:47:47 2003
+++ 25-akpm/include/linux/mm.h	Wed Jul 30 13:32:19 2003
@@ -570,6 +570,8 @@ int write_one_page(struct page *page, in
 
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			unsigned long offset, unsigned long nr_to_read);
+int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
+			unsigned long offset, unsigned long nr_to_read);
 void page_cache_readahead(struct address_space *mapping, 
 			  struct file_ra_state *ra,
 			  struct file *filp,

_
