mirror of
https://github.com/openbsd/src.git
synced 2026-06-17 23:03:29 +02:00
Remove the buffer flipper
This makes the buffer cache have only one clean cache, and to not care about where memory is allocated from. IO devices are expected to be able to handle IO from a buf at any physical address. ok kettenis@ deraadt@
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: kern_sysctl.c,v 1.493 2026/05/19 17:50:23 mvs Exp $ */
|
||||
/* $OpenBSD: kern_sysctl.c,v 1.494 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
|
||||
|
||||
/*-
|
||||
@@ -783,7 +783,7 @@ kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
|
||||
return (0);
|
||||
}
|
||||
case KERN_CACHEPCT: {
|
||||
u_int64_t dmapages;
|
||||
u_int64_t pages;
|
||||
int opct, pgs;
|
||||
opct = bufcachepercent;
|
||||
error = sysctl_int(oldp, oldlenp, newp, newlen,
|
||||
@@ -794,9 +794,9 @@ kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
|
||||
bufcachepercent = opct;
|
||||
return (EINVAL);
|
||||
}
|
||||
dmapages = uvm_pagecount(&dma_constraint);
|
||||
pages = uvm_pagecount(&no_constraint);
|
||||
if (bufcachepercent != opct) {
|
||||
pgs = bufcachepercent * dmapages / 100;
|
||||
pgs = bufcachepercent * pages / 100;
|
||||
bufadjust(pgs); /* adjust bufpages */
|
||||
bufhighpages = bufpages; /* set high water mark */
|
||||
}
|
||||
|
||||
+45
-314
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: vfs_bio.c,v 1.217 2026/02/11 22:34:41 deraadt Exp $ */
|
||||
/* $OpenBSD: vfs_bio.c,v 1.218 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
|
||||
|
||||
/*
|
||||
@@ -58,7 +58,6 @@
|
||||
#include <uvm/uvm_extern.h>
|
||||
|
||||
struct uvm_constraint_range high_constraint;
|
||||
int fliphigh;
|
||||
|
||||
int nobuffers;
|
||||
int needbuffer;
|
||||
@@ -78,7 +77,7 @@ void buf_put(struct buf *);
|
||||
struct buf *bio_doread(struct vnode *, daddr_t, int, int);
|
||||
struct buf *buf_get(struct vnode *, daddr_t, size_t);
|
||||
void bread_cluster_callback(struct buf *);
|
||||
int64_t bufcache_recover_dmapages(int discard, int64_t howmany);
|
||||
int64_t bufcache_recover_pages(int discard, int64_t howmany);
|
||||
static struct buf *incore_locked(struct vnode *vp, daddr_t blkno);
|
||||
|
||||
struct bcachestats bcstats; /* counters */
|
||||
@@ -94,9 +93,7 @@ vsize_t bufkvm;
|
||||
struct proc *cleanerproc;
|
||||
int bd_req; /* Sleep point for cleaner daemon. */
|
||||
|
||||
#define NUM_CACHES 2
|
||||
#define DMA_CACHE 0
|
||||
struct bufcache cleancache[NUM_CACHES];
|
||||
struct bufcache cleancache;
|
||||
struct bufqueue dirtyqueue;
|
||||
|
||||
void
|
||||
@@ -131,29 +128,11 @@ buf_put(struct buf *bp)
|
||||
void
|
||||
bufinit(void)
|
||||
{
|
||||
u_int64_t dmapages;
|
||||
u_int64_t highpages;
|
||||
u_int64_t pages;
|
||||
|
||||
dmapages = uvm_pagecount(&dma_constraint);
|
||||
pages = uvm_pagecount(&no_constraint);
|
||||
/* take away a guess at how much of this the kernel will consume */
|
||||
dmapages -= (atop(physmem) - atop(atomic_load_sint(&uvmexp.free)));
|
||||
|
||||
/* See if we have memory above the dma accessible region. */
|
||||
high_constraint.ucr_low = dma_constraint.ucr_high;
|
||||
high_constraint.ucr_high = no_constraint.ucr_high;
|
||||
if (high_constraint.ucr_low != high_constraint.ucr_high)
|
||||
high_constraint.ucr_low++;
|
||||
highpages = uvm_pagecount(&high_constraint);
|
||||
|
||||
/*
|
||||
* Do we have any significant amount of high memory above
|
||||
* the DMA region? if so enable moving buffers there, if not,
|
||||
* don't bother.
|
||||
*/
|
||||
if (highpages > dmapages / 4)
|
||||
fliphigh = 1;
|
||||
else
|
||||
fliphigh = 0;
|
||||
pages -= (atop(physmem) - atop(uvmexp.free));
|
||||
|
||||
/*
|
||||
* If MD code doesn't say otherwise, use up to 10% of DMA'able
|
||||
@@ -169,10 +148,10 @@ bufinit(void)
|
||||
KASSERT(bufcachepercent <= 90);
|
||||
KASSERT(bufcachepercent >= 5);
|
||||
if (bufpages == 0)
|
||||
bufpages = dmapages * bufcachepercent / 100;
|
||||
bufpages = pages * bufcachepercent / 100;
|
||||
if (bufpages < BCACHE_MIN)
|
||||
bufpages = BCACHE_MIN;
|
||||
KASSERT(bufpages < dmapages);
|
||||
KASSERT(bufpages < pages);
|
||||
|
||||
bufhighpages = bufpages;
|
||||
|
||||
@@ -180,7 +159,7 @@ bufinit(void)
|
||||
* Set the base backoff level for the buffer cache. We will
|
||||
* not allow uvm to steal back more than this number of pages.
|
||||
*/
|
||||
buflowpages = dmapages * 5 / 100;
|
||||
buflowpages = pages * 5 / 100;
|
||||
if (buflowpages < BCACHE_MIN)
|
||||
buflowpages = BCACHE_MIN;
|
||||
|
||||
@@ -257,15 +236,15 @@ bufadjust(int newbufpages)
|
||||
*/
|
||||
targetpages = bufpages - RESERVE_PAGES;
|
||||
|
||||
npages = bcstats.dmapages - targetpages;
|
||||
npages = bcstats.numbufpages - targetpages;
|
||||
|
||||
/*
|
||||
* Shrinking the cache happens here only if someone has manually
|
||||
* adjusted bufcachepercent - or the pagedaemon has told us
|
||||
* to give back memory *now* - so we give it all back.
|
||||
*/
|
||||
if (bcstats.dmapages > targetpages)
|
||||
(void) bufcache_recover_dmapages(0, bcstats.dmapages - targetpages);
|
||||
if (bcstats.numbufpages > targetpages)
|
||||
(void) bufcache_recover_pages(0, bcstats.numbufpages - targetpages);
|
||||
bufcache_adjust();
|
||||
|
||||
/*
|
||||
@@ -289,49 +268,7 @@ unsigned long
|
||||
bufbackoff(struct uvm_constraint_range *range, long size)
|
||||
{
|
||||
long pdelta, oldbufpages;
|
||||
int64_t dmarecovered, recovered = 0;
|
||||
|
||||
/*
|
||||
* If we will accept high memory for this backoff
|
||||
* try to steal it from the high memory buffer cache.
|
||||
*/
|
||||
if (range != NULL && range->ucr_high > dma_constraint.ucr_high) {
|
||||
struct buf *bp;
|
||||
int64_t start;
|
||||
int s;
|
||||
|
||||
start = bcstats.numbufpages;
|
||||
|
||||
s = splbio();
|
||||
while (recovered < size && (bp = bufcache_gethighcleanbuf())) {
|
||||
bufcache_take(bp);
|
||||
if (bp->b_vp) {
|
||||
RBT_REMOVE(buf_rb_bufs,
|
||||
&bp->b_vp->v_bufs_tree, bp);
|
||||
brelvp(bp);
|
||||
}
|
||||
buf_put(bp);
|
||||
recovered = start - bcstats.numbufpages;
|
||||
}
|
||||
bufcache_adjust();
|
||||
splx(s);
|
||||
|
||||
/* We got enough. */
|
||||
if (recovered >= size)
|
||||
return recovered;
|
||||
|
||||
/* If we needed only memory above DMA, we're done. */
|
||||
if (range->ucr_low > dma_constraint.ucr_high)
|
||||
return recovered;
|
||||
|
||||
/* Otherwise get the rest from DMA */
|
||||
size -= recovered;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX Otherwise do the dma memory cache dance. this needs
|
||||
* refactoring later to get rid of 'bufpages'
|
||||
*/
|
||||
int64_t recovered = 0;
|
||||
|
||||
/*
|
||||
* Back off by at least bufbackpages. If the page daemon gave us
|
||||
@@ -345,68 +282,9 @@ bufbackoff(struct uvm_constraint_range *range, long size)
|
||||
pdelta = bufpages - buflowpages;
|
||||
oldbufpages = bufpages;
|
||||
bufadjust(bufpages - pdelta);
|
||||
dmarecovered = oldbufpages - bufpages;
|
||||
recovered = oldbufpages - bufpages;
|
||||
|
||||
return recovered + dmarecovered;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Opportunistically flip a buffer into high memory. Will move the buffer
|
||||
* if memory is available without sleeping, and return 0, otherwise will
|
||||
* fail and return -1 with the buffer unchanged.
|
||||
*/
|
||||
|
||||
int
|
||||
buf_flip_high(struct buf *bp)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
KASSERT(ISSET(bp->b_flags, B_BC));
|
||||
KASSERT(ISSET(bp->b_flags, B_DMA));
|
||||
KASSERT(bp->cache == DMA_CACHE);
|
||||
KASSERT(fliphigh);
|
||||
|
||||
splassert(IPL_BIO);
|
||||
|
||||
/* Attempt to move the buffer to high memory if we can */
|
||||
if (buf_realloc_pages(bp, &high_constraint, UVM_PLA_NOWAIT) == 0) {
|
||||
KASSERT(!ISSET(bp->b_flags, B_DMA));
|
||||
bcstats.highflips++;
|
||||
ret = 0;
|
||||
} else
|
||||
bcstats.highflops++;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flip a buffer to dma reachable memory, when we need it there for
|
||||
* I/O. This can sleep since it will wait for memory allocation in the
|
||||
* DMA reachable area since we have to have the buffer there to proceed.
|
||||
*/
|
||||
void
|
||||
buf_flip_dma(struct buf *bp)
|
||||
{
|
||||
KASSERT(ISSET(bp->b_flags, B_BC));
|
||||
KASSERT(ISSET(bp->b_flags, B_BUSY));
|
||||
KASSERT(bp->cache < NUM_CACHES);
|
||||
|
||||
splassert(IPL_BIO);
|
||||
|
||||
if (!ISSET(bp->b_flags, B_DMA)) {
|
||||
|
||||
/* move buf to dma reachable memory */
|
||||
(void) buf_realloc_pages(bp, &dma_constraint, UVM_PLA_WAITOK);
|
||||
KASSERT(ISSET(bp->b_flags, B_DMA));
|
||||
bcstats.dmaflips++;
|
||||
}
|
||||
|
||||
if (bp->cache > DMA_CACHE) {
|
||||
CLR(bp->b_flags, B_COLD);
|
||||
CLR(bp->b_flags, B_WARM);
|
||||
bp->cache = DMA_CACHE;
|
||||
}
|
||||
return recovered;
|
||||
}
|
||||
|
||||
struct buf *
|
||||
@@ -633,12 +511,7 @@ bread_cluster(struct vnode *vp, daddr_t blkno, int size, struct buf **rbpp)
|
||||
for (i = 1; i < howmany; i++) {
|
||||
bcstats.pendingreads++;
|
||||
bcstats.numreads++;
|
||||
/*
|
||||
* We set B_DMA here because bp above will be B_DMA,
|
||||
* and we are playing buffer slice-n-dice games from
|
||||
* the memory allocated in bp.
|
||||
*/
|
||||
SET(xbpp[i]->b_flags, B_DMA | B_READ | B_ASYNC);
|
||||
SET(xbpp[i]->b_flags, B_READ | B_ASYNC);
|
||||
xbpp[i]->b_blkno = sblkno + (i * inc);
|
||||
xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
|
||||
xbpp[i]->b_data = NULL;
|
||||
@@ -732,7 +605,6 @@ bwrite(struct buf *bp)
|
||||
|
||||
/* Initiate disk write. Make sure the appropriate party is charged. */
|
||||
bp->b_vp->v_numoutput++;
|
||||
buf_flip_dma(bp);
|
||||
SET(bp->b_flags, B_WRITEINPROG);
|
||||
splx(s);
|
||||
VOP_STRATEGY(bp->b_vp, bp);
|
||||
@@ -788,7 +660,6 @@ bdwrite(struct buf *bp)
|
||||
if (!ISSET(bp->b_flags, B_DELWRI)) {
|
||||
SET(bp->b_flags, B_DELWRI);
|
||||
s = splbio();
|
||||
buf_flip_dma(bp);
|
||||
reassignbuf(bp);
|
||||
splx(s);
|
||||
curproc->p_ru.ru_oublock++; /* XXX */
|
||||
@@ -827,7 +698,6 @@ buf_dirty(struct buf *bp)
|
||||
|
||||
if (ISSET(bp->b_flags, B_DELWRI) == 0) {
|
||||
SET(bp->b_flags, B_DELWRI);
|
||||
buf_flip_dma(bp);
|
||||
reassignbuf(bp);
|
||||
}
|
||||
}
|
||||
@@ -922,9 +792,9 @@ brelse(struct buf *bp)
|
||||
wakeup(bp);
|
||||
}
|
||||
|
||||
if (bcstats.dmapages > targetpages)
|
||||
(void) bufcache_recover_dmapages(0,
|
||||
bcstats.dmapages - targetpages);
|
||||
if (bcstats.numbufpages > targetpages)
|
||||
(void) bufcache_recover_pages(0,
|
||||
bcstats.numbufpages - targetpages);
|
||||
bufcache_adjust();
|
||||
}
|
||||
|
||||
@@ -935,7 +805,7 @@ brelse(struct buf *bp)
|
||||
}
|
||||
|
||||
/* Wake up any processes waiting for any buffer to become free. */
|
||||
if (needbuffer && bcstats.dmapages < targetpages &&
|
||||
if (needbuffer && bcstats.numbufpages < targetpages &&
|
||||
bcstats.kvaslots_avail > RESERVE_SLOTS) {
|
||||
needbuffer = 0;
|
||||
wakeup(&needbuffer);
|
||||
@@ -1088,8 +958,8 @@ buf_get(struct vnode *vp, daddr_t blkno, size_t size)
|
||||
* new allocation, free enough buffers first
|
||||
* to stay at the target with our new allocation.
|
||||
*/
|
||||
if (bcstats.dmapages + npages > targetpages) {
|
||||
(void) bufcache_recover_dmapages(0, npages);
|
||||
if (bcstats.numbufpages + npages > targetpages) {
|
||||
(void) bufcache_recover_pages(0, npages);
|
||||
bufcache_adjust();
|
||||
}
|
||||
|
||||
@@ -1098,7 +968,7 @@ buf_get(struct vnode *vp, daddr_t blkno, size_t size)
|
||||
* above, and couldn't get down - Wake the cleaner
|
||||
* and wait for it to push some buffers out.
|
||||
*/
|
||||
if ((bcstats.dmapages + npages > targetpages ||
|
||||
if ((bcstats.numbufpages + npages > targetpages ||
|
||||
bcstats.kvaslots_avail <= RESERVE_SLOTS) &&
|
||||
curproc != syncerproc && curproc != cleanerproc) {
|
||||
wakeup(&bd_req);
|
||||
@@ -1107,7 +977,7 @@ buf_get(struct vnode *vp, daddr_t blkno, size_t size)
|
||||
splx(s);
|
||||
return (NULL);
|
||||
}
|
||||
if (bcstats.dmapages + npages > bufpages) {
|
||||
if (bcstats.numbufpages + npages > bufpages) {
|
||||
/* cleaner or syncer */
|
||||
nobuffers = 1;
|
||||
tsleep_nsec(&nobuffers, PRIBIO, "nobuffers", INFSLP);
|
||||
@@ -1159,7 +1029,6 @@ buf_get(struct vnode *vp, daddr_t blkno, size_t size)
|
||||
|
||||
if (size) {
|
||||
buf_alloc_pages(bp, round_page(size));
|
||||
KASSERT(ISSET(bp->b_flags, B_DMA));
|
||||
buf_map(bp);
|
||||
}
|
||||
|
||||
@@ -1324,12 +1193,10 @@ bcstats_print(
|
||||
bcstats.numbufs, bcstats.busymapped, bcstats.delwribufs);
|
||||
(*pr)("kvaslots %lld avail kva slots %lld\n",
|
||||
bcstats.kvaslots, bcstats.kvaslots_avail);
|
||||
(*pr)("bufpages %lld, dmapages %lld, dirtypages %lld\n",
|
||||
bcstats.numbufpages, bcstats.dmapages, bcstats.numdirtypages);
|
||||
(*pr)("bufpages %lld, dirtypages %lld\n",
|
||||
bcstats.numbufpages, bcstats.numdirtypages);
|
||||
(*pr)("pendingreads %lld, pendingwrites %lld\n",
|
||||
bcstats.pendingreads, bcstats.pendingwrites);
|
||||
(*pr)("highflips %lld, highflops %lld, dmaflips %lld\n",
|
||||
bcstats.highflips, bcstats.highflops, bcstats.dmaflips);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1419,13 +1286,9 @@ int chillbufs(struct
|
||||
void
|
||||
bufcache_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_CACHES; i++) {
|
||||
TAILQ_INIT(&cleancache[i].hotqueue);
|
||||
TAILQ_INIT(&cleancache[i].coldqueue);
|
||||
TAILQ_INIT(&cleancache[i].warmqueue);
|
||||
}
|
||||
TAILQ_INIT(&cleancache.hotqueue);
|
||||
TAILQ_INIT(&cleancache.coldqueue);
|
||||
TAILQ_INIT(&cleancache.warmqueue);
|
||||
TAILQ_INIT(&dirtyqueue);
|
||||
}
|
||||
|
||||
@@ -1435,15 +1298,11 @@ bufcache_init(void)
|
||||
void
|
||||
bufcache_adjust(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_CACHES; i++) {
|
||||
while (chillbufs(&cleancache[i], &cleancache[i].warmqueue,
|
||||
&cleancache[i].warmbufpages) ||
|
||||
chillbufs(&cleancache[i], &cleancache[i].hotqueue,
|
||||
&cleancache[i].hotbufpages))
|
||||
continue;
|
||||
}
|
||||
while (chillbufs(&cleancache, &cleancache.warmqueue,
|
||||
&cleancache.warmbufpages) ||
|
||||
chillbufs(&cleancache, &cleancache.hotqueue,
|
||||
&cleancache.hotbufpages))
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1452,10 +1311,10 @@ bufcache_adjust(void)
|
||||
* away such as in a hibernation
|
||||
*/
|
||||
struct buf *
|
||||
bufcache_getcleanbuf(int cachenum, int discard)
|
||||
bufcache_getcleanbuf(int discard)
|
||||
{
|
||||
struct buf *bp = NULL;
|
||||
struct bufcache *cache = &cleancache[cachenum];
|
||||
struct bufcache *cache = &cleancache;
|
||||
struct bufqueue * queue;
|
||||
|
||||
splassert(IPL_BIO);
|
||||
@@ -1465,36 +1324,15 @@ bufcache_getcleanbuf(int cachenum, int discard)
|
||||
(bp = TAILQ_FIRST(&cache->warmqueue)) ||
|
||||
(bp = TAILQ_FIRST(&cache->hotqueue))) {
|
||||
int64_t pages = atop(bp->b_bufsize);
|
||||
struct bufcache *newcache;
|
||||
|
||||
if (discard || cachenum >= NUM_CACHES - 1) {
|
||||
if (discard)
|
||||
/* Victim selected, give it up */
|
||||
return bp;
|
||||
}
|
||||
KASSERT(bp->cache == cachenum);
|
||||
|
||||
/*
|
||||
* If this buffer was warm before, move it to
|
||||
* the hot queue in the next cache
|
||||
*/
|
||||
|
||||
if (fliphigh) {
|
||||
/*
|
||||
* If we are in the DMA cache, try to flip the
|
||||
* buffer up high to move it on to the other
|
||||
* caches. if we can't move the buffer to high
|
||||
* memory without sleeping, we give it up and
|
||||
* return it rather than fight for more memory
|
||||
* against non buffer cache competitors.
|
||||
*/
|
||||
SET(bp->b_flags, B_BUSY);
|
||||
if (bp->cache == 0 && buf_flip_high(bp) == -1) {
|
||||
CLR(bp->b_flags, B_BUSY);
|
||||
return bp;
|
||||
}
|
||||
CLR(bp->b_flags, B_BUSY);
|
||||
}
|
||||
|
||||
/* Move the buffer to the hot queue in the next cache */
|
||||
if (ISSET(bp->b_flags, B_COLD)) {
|
||||
queue = &cache->coldqueue;
|
||||
@@ -1509,13 +1347,6 @@ bufcache_getcleanbuf(int cachenum, int discard)
|
||||
cache->cachepages -= pages;
|
||||
CLR(bp->b_flags, B_WARM);
|
||||
CLR(bp->b_flags, B_COLD);
|
||||
bp->cache++;
|
||||
newcache= &cleancache[bp->cache];
|
||||
newcache->cachepages += pages;
|
||||
newcache->hotbufpages += pages;
|
||||
chillbufs(newcache, &newcache->hotqueue,
|
||||
&newcache->hotbufpages);
|
||||
TAILQ_INSERT_TAIL(&newcache->hotqueue, bp, b_freelist);
|
||||
}
|
||||
return bp;
|
||||
}
|
||||
@@ -1536,11 +1367,10 @@ discard_buffer(struct buf *bp)
|
||||
}
|
||||
|
||||
int64_t
|
||||
bufcache_recover_dmapages(int discard, int64_t howmany)
|
||||
bufcache_recover_pages(int discard, int64_t howmany)
|
||||
{
|
||||
struct buf *bp = NULL;
|
||||
struct bufcache *cache = &cleancache[DMA_CACHE];
|
||||
struct bufqueue * queue;
|
||||
struct bufcache *cache = &cleancache;
|
||||
int64_t recovered = 0;
|
||||
|
||||
splassert(IPL_BIO);
|
||||
@@ -1550,101 +1380,13 @@ bufcache_recover_dmapages(int discard, int64_t howmany)
|
||||
(bp = TAILQ_FIRST(&cache->warmqueue)) ||
|
||||
(bp = TAILQ_FIRST(&cache->hotqueue)))) {
|
||||
int64_t pages = atop(bp->b_bufsize);
|
||||
struct bufcache *newcache;
|
||||
|
||||
if (discard || DMA_CACHE >= NUM_CACHES - 1) {
|
||||
discard_buffer(bp);
|
||||
continue;
|
||||
}
|
||||
KASSERT(bp->cache == DMA_CACHE);
|
||||
|
||||
/*
|
||||
* If this buffer was warm before, move it to
|
||||
* the hot queue in the next cache
|
||||
*/
|
||||
|
||||
/*
|
||||
* One way or another, the pages for this
|
||||
* buffer are leaving DMA memory
|
||||
*/
|
||||
recovered += pages;
|
||||
|
||||
if (!fliphigh) {
|
||||
discard_buffer(bp);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are in the DMA cache, try to flip the
|
||||
* buffer up high to move it on to the other
|
||||
* caches. if we can't move the buffer to high
|
||||
* memory without sleeping, we give it up
|
||||
* now rather than fight for more memory
|
||||
* against non buffer cache competitors.
|
||||
*/
|
||||
SET(bp->b_flags, B_BUSY);
|
||||
if (bp->cache == 0 && buf_flip_high(bp) == -1) {
|
||||
CLR(bp->b_flags, B_BUSY);
|
||||
discard_buffer(bp);
|
||||
continue;
|
||||
}
|
||||
CLR(bp->b_flags, B_BUSY);
|
||||
|
||||
/*
|
||||
* Move the buffer to the hot queue in the next cache
|
||||
*/
|
||||
if (ISSET(bp->b_flags, B_COLD)) {
|
||||
queue = &cache->coldqueue;
|
||||
} else if (ISSET(bp->b_flags, B_WARM)) {
|
||||
queue = &cache->warmqueue;
|
||||
cache->warmbufpages -= pages;
|
||||
} else {
|
||||
queue = &cache->hotqueue;
|
||||
cache->hotbufpages -= pages;
|
||||
}
|
||||
TAILQ_REMOVE(queue, bp, b_freelist);
|
||||
cache->cachepages -= pages;
|
||||
CLR(bp->b_flags, B_WARM);
|
||||
CLR(bp->b_flags, B_COLD);
|
||||
bp->cache++;
|
||||
newcache= &cleancache[bp->cache];
|
||||
newcache->cachepages += pages;
|
||||
newcache->hotbufpages += pages;
|
||||
chillbufs(newcache, &newcache->hotqueue,
|
||||
&newcache->hotbufpages);
|
||||
TAILQ_INSERT_TAIL(&newcache->hotqueue, bp, b_freelist);
|
||||
discard_buffer(bp);
|
||||
}
|
||||
return recovered;
|
||||
}
|
||||
|
||||
struct buf *
|
||||
bufcache_getcleanbuf_range(int start, int end, int discard)
|
||||
{
|
||||
int i, j = start, q = end;
|
||||
struct buf *bp = NULL;
|
||||
|
||||
/*
|
||||
* XXX in theory we could promote warm buffers into a previous queue
|
||||
* so in the pathological case of where we go through all the caches
|
||||
* without getting a buffer we have to start at the beginning again.
|
||||
*/
|
||||
while (j <= q) {
|
||||
for (i = q; i >= j; i--)
|
||||
if ((bp = bufcache_getcleanbuf(i, discard)))
|
||||
return (bp);
|
||||
j++;
|
||||
}
|
||||
return bp;
|
||||
}
|
||||
|
||||
struct buf *
|
||||
bufcache_gethighcleanbuf(void)
|
||||
{
|
||||
if (!fliphigh)
|
||||
return NULL;
|
||||
return bufcache_getcleanbuf_range(DMA_CACHE + 1, NUM_CACHES - 1, 0);
|
||||
}
|
||||
|
||||
struct buf *
|
||||
bufcache_getdirtybuf(void)
|
||||
{
|
||||
@@ -1659,14 +1401,12 @@ bufcache_take(struct buf *bp)
|
||||
|
||||
splassert(IPL_BIO);
|
||||
KASSERT(ISSET(bp->b_flags, B_BC));
|
||||
KASSERT(bp->cache >= DMA_CACHE);
|
||||
KASSERT((bp->cache < NUM_CACHES));
|
||||
|
||||
pages = atop(bp->b_bufsize);
|
||||
|
||||
TRACEPOINT(vfs, bufcache_take, bp->b_flags, bp->cache, pages);
|
||||
TRACEPOINT(vfs, bufcache_take, bp->b_flags, pages);
|
||||
|
||||
struct bufcache *cache = &cleancache[bp->cache];
|
||||
struct bufcache *cache = &cleancache;
|
||||
if (!ISSET(bp->b_flags, B_DELWRI)) {
|
||||
if (ISSET(bp->b_flags, B_COLD)) {
|
||||
queue = &cache->coldqueue;
|
||||
@@ -1727,21 +1467,12 @@ bufcache_release(struct buf *bp)
|
||||
{
|
||||
struct bufqueue *queue;
|
||||
int64_t pages;
|
||||
struct bufcache *cache = &cleancache[bp->cache];
|
||||
struct bufcache *cache = &cleancache;
|
||||
|
||||
KASSERT(ISSET(bp->b_flags, B_BC));
|
||||
pages = atop(bp->b_bufsize);
|
||||
|
||||
TRACEPOINT(vfs, bufcache_rel, bp->b_flags, bp->cache, pages);
|
||||
|
||||
if (fliphigh) {
|
||||
if (ISSET(bp->b_flags, B_DMA) && bp->cache > 0)
|
||||
panic("B_DMA buffer release from cache %d",
|
||||
bp->cache);
|
||||
else if ((!ISSET(bp->b_flags, B_DMA)) && bp->cache == 0)
|
||||
panic("Non B_DMA buffer release from cache %d",
|
||||
bp->cache);
|
||||
}
|
||||
TRACEPOINT(vfs, bufcache_rel, bp->b_flags, pages);
|
||||
|
||||
if (!ISSET(bp->b_flags, B_DELWRI)) {
|
||||
int64_t *queuepages;
|
||||
@@ -1780,7 +1511,7 @@ hibernate_suspend_bufcache(void)
|
||||
|
||||
s = splbio();
|
||||
/* Chuck away all the cache pages.. discard bufs, do not promote */
|
||||
while ((bp = bufcache_getcleanbuf_range(DMA_CACHE, NUM_CACHES - 1, 1))) {
|
||||
while ((bp = bufcache_getcleanbuf(1))){
|
||||
bufcache_take(bp);
|
||||
if (bp->b_vp) {
|
||||
RBT_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, bp);
|
||||
|
||||
+1
-77
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: vfs_biomem.c,v 1.54 2026/04/10 02:03:40 kirill Exp $ */
|
||||
/* $OpenBSD: vfs_biomem.c,v 1.55 2026/06/10 00:04:38 beck Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
|
||||
@@ -282,8 +282,6 @@ buf_alloc_pages(struct buf *bp, vsize_t size)
|
||||
"of size %lu", size);
|
||||
|
||||
bcstats.numbufpages += atop(size);
|
||||
bcstats.dmapages += atop(size);
|
||||
SET(bp->b_flags, B_DMA);
|
||||
bp->b_pobj = &bp->b_uobj;
|
||||
bp->b_poffs = 0;
|
||||
bp->b_bufsize = size;
|
||||
@@ -310,82 +308,8 @@ buf_free_pages(struct buf *bp)
|
||||
KASSERT(pg->wire_count == 1);
|
||||
pg->wire_count = 0;
|
||||
bcstats.numbufpages--;
|
||||
if (ISSET(bp->b_flags, B_DMA))
|
||||
bcstats.dmapages--;
|
||||
}
|
||||
CLR(bp->b_flags, B_DMA);
|
||||
|
||||
/* XXX refactor to do this without splbio later */
|
||||
uvm_obj_free(uobj);
|
||||
}
|
||||
|
||||
/* Reallocate a buf into a particular pmem range specified by "where". */
|
||||
int
|
||||
buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
|
||||
int flags)
|
||||
{
|
||||
vsize_t size;
|
||||
vaddr_t va;
|
||||
int dma;
|
||||
int i, r;
|
||||
KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
|
||||
|
||||
splassert(IPL_BIO);
|
||||
KASSERT(ISSET(bp->b_flags, B_BUSY));
|
||||
dma = ISSET(bp->b_flags, B_DMA);
|
||||
|
||||
/* if the original buf is mapped, unmap it */
|
||||
if (bp->b_data != NULL) {
|
||||
va = (vaddr_t)bp->b_data;
|
||||
pmap_kremove(va, bp->b_bufsize);
|
||||
pmap_update(pmap_kernel());
|
||||
}
|
||||
|
||||
do {
|
||||
r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
|
||||
bp->b_bufsize, UVM_PLA_NOWAIT | UVM_PLA_NOWAKE, where);
|
||||
if (r == 0)
|
||||
break;
|
||||
size = atop(bp->b_bufsize);
|
||||
} while ((bufbackoff(where, size) >= size));
|
||||
|
||||
/*
|
||||
* bufbackoff() failed, so there's no more we can do without
|
||||
* waiting. If allowed do, make that attempt.
|
||||
*/
|
||||
if (r != 0 && (flags & UVM_PLA_WAITOK))
|
||||
r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
|
||||
bp->b_bufsize, flags, where);
|
||||
|
||||
/*
|
||||
* If the allocation has succeeded, we may be somewhere different.
|
||||
* If the allocation has failed, we are in the same place.
|
||||
*
|
||||
* We still have to re-map the buffer before returning.
|
||||
*/
|
||||
|
||||
/* take it out of dma stats until we know where we are */
|
||||
if (dma)
|
||||
bcstats.dmapages -= atop(bp->b_bufsize);
|
||||
|
||||
dma = 1;
|
||||
/* if the original buf was mapped, re-map it */
|
||||
for (i = 0; i < atop(bp->b_bufsize); i++) {
|
||||
struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
|
||||
bp->b_poffs + ptoa(i));
|
||||
KASSERT(pg != NULL);
|
||||
if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
|
||||
dma = 0;
|
||||
if (bp->b_data != NULL) {
|
||||
pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
|
||||
PROT_READ|PROT_WRITE);
|
||||
pmap_update(pmap_kernel());
|
||||
}
|
||||
}
|
||||
if (dma) {
|
||||
SET(bp->b_flags, B_DMA);
|
||||
bcstats.dmapages += atop(bp->b_bufsize);
|
||||
} else
|
||||
CLR(bp->b_flags, B_DMA);
|
||||
return(r);
|
||||
}
|
||||
|
||||
+1
-4
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: vfs_vops.c,v 1.38 2025/05/02 10:44:20 claudio Exp $ */
|
||||
/* $OpenBSD: vfs_vops.c,v 1.39 2026/06/10 00:04:38 beck Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
|
||||
*
|
||||
@@ -630,9 +630,6 @@ VOP_STRATEGY(struct vnode *vp, struct buf *bp)
|
||||
a.a_vp = vp;
|
||||
a.a_bp = bp;
|
||||
|
||||
if ((ISSET(bp->b_flags, B_BC)) && (!ISSET(bp->b_flags, B_DMA)))
|
||||
panic("Non dma reachable buffer passed to VOP_STRATEGY");
|
||||
|
||||
if (vp->v_op->vop_strategy == NULL)
|
||||
return (EOPNOTSUPP);
|
||||
|
||||
|
||||
+1
-2
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: nfs_vnops.c,v 1.213 2026/06/09 02:50:21 jsg Exp $ */
|
||||
/* $OpenBSD: nfs_vnops.c,v 1.214 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $ */
|
||||
|
||||
/*
|
||||
@@ -3432,7 +3432,6 @@ nfs_writebp(struct buf *bp, int force)
|
||||
}
|
||||
if (retv) {
|
||||
s = splbio();
|
||||
buf_flip_dma(bp);
|
||||
if (force)
|
||||
bp->b_flags |= B_WRITEINPROG;
|
||||
splx(s);
|
||||
|
||||
+2
-5
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: buf.h,v 1.120 2025/08/14 16:13:52 beck Exp $ */
|
||||
/* $OpenBSD: buf.h,v 1.121 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
|
||||
|
||||
/*
|
||||
@@ -120,7 +120,6 @@ struct buf {
|
||||
LIST_ENTRY(buf) b_list; /* All allocated buffers. */
|
||||
LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
|
||||
TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
|
||||
int cache; /* which cache are we in */
|
||||
struct proc *b_proc; /* Associated proc; NULL if kernel. */
|
||||
volatile long b_flags; /* B_* flags. */
|
||||
long b_bufsize; /* Allocated buffer size. */
|
||||
@@ -193,7 +192,6 @@ struct bufcache {
|
||||
#define B_WARM 0x00800000 /* buffer is or has been on the warm queue */
|
||||
#define B_COLD 0x01000000 /* buffer is on the cold queue */
|
||||
#define B_BC 0x02000000 /* buffer is managed by the cache */
|
||||
#define B_DMA 0x04000000 /* buffer is DMA reachable */
|
||||
|
||||
#define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
|
||||
"\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
|
||||
@@ -264,7 +262,7 @@ void bufcache_release(struct buf *);
|
||||
|
||||
int buf_flip_high(struct buf *);
|
||||
void buf_flip_dma(struct buf *);
|
||||
struct buf *bufcache_getcleanbuf(int, int);
|
||||
struct buf *bufcache_getcleanbuf(int);
|
||||
struct buf *bufcache_getdirtybuf(void);
|
||||
|
||||
/*
|
||||
@@ -284,7 +282,6 @@ int buf_dealloc_mem(struct buf *);
|
||||
void buf_fix_mapping(struct buf *, vsize_t);
|
||||
void buf_alloc_pages(struct buf *, vsize_t);
|
||||
void buf_free_pages(struct buf *);
|
||||
int buf_realloc_pages(struct buf *, struct uvm_constraint_range *, int);
|
||||
|
||||
void minphys(struct buf *bp);
|
||||
int physio(void (*strategy)(struct buf *), dev_t dev, int flags,
|
||||
|
||||
+1
-6
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: mount.h,v 1.153 2025/01/02 01:19:22 dlg Exp $ */
|
||||
/* $OpenBSD: mount.h,v 1.154 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */
|
||||
|
||||
/*
|
||||
@@ -478,14 +478,9 @@ struct bcachestats {
|
||||
int64_t numreads; /* total reads started */
|
||||
int64_t cachehits; /* total reads found in cache */
|
||||
int64_t busymapped; /* number of busy and mapped buffers */
|
||||
int64_t dmapages; /* dma reachable pages in buffer cache */
|
||||
int64_t highpages; /* pages above dma region */
|
||||
int64_t delwribufs; /* delayed write buffers */
|
||||
int64_t kvaslots; /* kva slots total */
|
||||
int64_t kvaslots_avail; /* available kva slots */
|
||||
int64_t highflips; /* total flips to above DMA */
|
||||
int64_t highflops; /* total failed flips to above DMA */
|
||||
int64_t dmaflips; /* total flips from high to DMA */
|
||||
};
|
||||
#ifdef _KERNEL
|
||||
extern struct bcachestats bcstats;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: uvm_extern.h,v 1.187 2025/11/13 10:55:51 mpi Exp $ */
|
||||
/* $OpenBSD: uvm_extern.h,v 1.188 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
|
||||
|
||||
/*
|
||||
@@ -398,8 +398,6 @@ int uvm_pagealloc_multi(struct uvm_object *, voff_t,
|
||||
vsize_t, int);
|
||||
void uvm_pagerealloc(struct vm_page *,
|
||||
struct uvm_object *, voff_t);
|
||||
int uvm_pagerealloc_multi(struct uvm_object *, voff_t,
|
||||
vsize_t, int, struct uvm_constraint_range *);
|
||||
/* Actually, uvm_page_physload takes PF#s which need their own type */
|
||||
void uvm_page_physload(paddr_t, paddr_t, paddr_t,
|
||||
paddr_t, int);
|
||||
|
||||
+3
-46
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: uvm_page.c,v 1.188 2026/02/11 22:34:41 deraadt Exp $ */
|
||||
/* $OpenBSD: uvm_page.c,v 1.189 2026/06/10 00:04:38 beck Exp $ */
|
||||
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
|
||||
|
||||
/*
|
||||
@@ -798,7 +798,6 @@ uvm_pglistfree(struct pglist *list)
|
||||
|
||||
/*
|
||||
* interface used by the buffer cache to allocate a buffer at a time.
|
||||
* The pages are allocated wired in DMA accessible memory
|
||||
*/
|
||||
int
|
||||
uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
|
||||
@@ -812,8 +811,8 @@ uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
|
||||
KERNEL_ASSERT_LOCKED();
|
||||
|
||||
TAILQ_INIT(&plist);
|
||||
r = uvm_pglistalloc(size, dma_constraint.ucr_low,
|
||||
dma_constraint.ucr_high, 0, 0, &plist, atop(round_page(size)),
|
||||
r = uvm_pglistalloc(size, no_constraint.ucr_low,
|
||||
no_constraint.ucr_high, 0, 0, &plist, atop(round_page(size)),
|
||||
flags);
|
||||
if (r == 0) {
|
||||
i = 0;
|
||||
@@ -828,48 +827,6 @@ uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* interface used by the buffer cache to reallocate a buffer at a time.
|
||||
* The pages are reallocated wired outside the DMA accessible region.
|
||||
*
|
||||
*/
|
||||
int
|
||||
uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
|
||||
int flags, struct uvm_constraint_range *where)
|
||||
{
|
||||
struct pglist plist;
|
||||
struct vm_page *pg, *tpg;
|
||||
int i, r;
|
||||
voff_t offset;
|
||||
|
||||
KASSERT(UVM_OBJ_IS_BUFCACHE(obj));
|
||||
KERNEL_ASSERT_LOCKED();
|
||||
|
||||
TAILQ_INIT(&plist);
|
||||
if (size == 0)
|
||||
panic("size 0 uvm_pagerealloc");
|
||||
r = uvm_pglistalloc(size, where->ucr_low, where->ucr_high, 0,
|
||||
0, &plist, atop(round_page(size)), flags);
|
||||
if (r == 0) {
|
||||
i = 0;
|
||||
while((pg = TAILQ_FIRST(&plist)) != NULL) {
|
||||
offset = off + ptoa(i++);
|
||||
tpg = uvm_pagelookup(obj, offset);
|
||||
KASSERT(tpg != NULL);
|
||||
pg->wire_count = 1;
|
||||
atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
|
||||
KASSERT((pg->pg_flags & PG_DEV) == 0);
|
||||
TAILQ_REMOVE(&plist, pg, pageq);
|
||||
uvm_pagecopy(tpg, pg);
|
||||
KASSERT(tpg->wire_count == 1);
|
||||
tpg->wire_count = 0;
|
||||
uvm_pagefree(tpg);
|
||||
uvm_pagealloc_pg(pg, obj, offset, NULL);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pagealloc: allocate vm_page from a particular free list.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user