From phk at varnish-cache.org Tue Feb 1 10:27:47 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 01 Feb 2011 11:27:47 +0100 Subject: [master] c157e5f Polishing the object allocation code and persistence a bit Message-ID: commit c157e5fd1cd48cf2bab58801702685f1cb1f34c0 Author: Poul-Henning Kamp Date: Tue Feb 1 10:27:17 2011 +0000 Polishing the object allocation code and persistence a bit diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 8e4d0c5..421f950 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -176,6 +176,9 @@ STV_MkObject(struct sess *sp, void *ptr, unsigned ltot, CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC); assert(PAOK(ptr)); + assert(PAOK(soc->wsl)); + assert(PAOK(soc->lhttp)); + assert(ltot >= sizeof *o + soc->lhttp + soc->wsl); o = ptr; @@ -185,12 +188,10 @@ STV_MkObject(struct sess *sp, void *ptr, unsigned ltot, l = PRNDDN(ltot - (sizeof *o + soc->lhttp)); assert(l >= soc->wsl); - assert(PAOK(soc->wsl)); - assert(PAOK(soc->lhttp)); - o->http = HTTP_create(o + 1, soc->nhttp); WS_Init(o->ws_o, "obj", (char *)(o + 1) + soc->lhttp, soc->wsl); WS_Assert(o->ws_o); + assert(o->ws_o->e <= (char*)ptr + ltot); http_Setup(o->http, o->ws_o); o->http->magic = HTTP_MAGIC; @@ -227,8 +228,12 @@ stv_default_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC); st = stv->alloc(stv, ltot); - XXXAN(st); - xxxassert(st->space >= ltot); + if (st == NULL) + return (NULL); + if (st->space < ltot) { + stv->free(st); + return (NULL); + } ltot = st->len = st->space; o = STV_MkObject(sp, st->ptr, ltot, soc); CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); @@ -238,7 +243,7 @@ stv_default_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, /*------------------------------------------------------------------- * Allocate storage for an object, based on the header information. - * XXX: If we know (a hint of) the length, we should allocate space + * XXX: If we know (a hint of) the length, we could allocate space * XXX: for the body in the same allocation while we are at it. */ @@ -269,6 +274,8 @@ STV_NewObject(struct sess *sp, const char *hint, unsigned wsl, double ttl, stv = stv_pick_stevedore(hint); AN(stv->allocobj); o = stv->allocobj(stv, sp, ltot, &soc); + if (o == NULL) + return (NULL); CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC); return (o); diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index a4ba7a3..740c4d0 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -133,7 +133,7 @@ struct smp_sc { unsigned granularity; uint32_t unique; - uint8_t *ptr; + uint8_t *base; struct smp_ident *ident; @@ -210,7 +210,7 @@ smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, assert(strlen(id) < sizeof ctx->ss->ident); memset(ctx, 0, sizeof ctx); - ctx->ss = (void*)(sc->ptr + off); + ctx->ss = (void*)(sc->base + off); ctx->unique = sc->unique; ctx->id = id; } @@ -567,10 +567,10 @@ smp_init(struct stevedore *parent, int ac, char * const *av) AZ(ftruncate(sc->fd, sc->mediasize)); - sc->ptr = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, + sc->base = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, 0); - if (sc->ptr == MAP_FAILED) + if (sc->base == MAP_FAILED) ARGV_ERR("(-spersistent) failed to mmap (%s)\n", strerror(errno)); @@ -914,7 +914,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); if (smp_chk_sign(ctx)) return; - so = (void*)(sc->ptr + sg->p.objlist); + so = (void*)(sc->base + sg->p.objlist); sg->objs = so; sg->nalloc2 = sg->p.nalloc; no = sg->p.nalloc; @@ -1144,7 +1144,7 @@ smp_new_seg(struct smp_sc *sc) sg->next_addr = sg->p.offset + sizeof (struct smp_sign) + // XXX use macro SHA256_LEN; - memcpy(sc->ptr + sg->next_addr, "HERE", 4); + memcpy(sc->base + sg->next_addr, "HERE", 4); sc->objreserv = 0; } @@ -1188,7 +1188,7 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) sg->p.objlist = sg->next_addr; sg->p.nalloc = sg->nalloc1; - p = (void*)(sc->ptr + sg->next_addr); + p = (void*)(sc->base + sg->next_addr); sg->next_addr += C_ALIGN(sc->objreserv); memcpy(p, sg->objs, sc->objreserv); @@ -1258,7 +1258,7 @@ smp_open(const struct stevedore *st) /* We trust the parent to give us a valid silo, for good measure: */ AZ(smp_valid_silo(sc)); - AZ(mprotect(sc->ptr, 4096, PROT_READ)); + AZ(mprotect(sc->base, 4096, PROT_READ)); sc->ident = SIGN_DATA(&sc->idn); @@ -1378,15 +1378,15 @@ smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) assert(needed <= smp_spaceleft(sg)); /* Grab for storage struct */ - ss = (void *)(sc->ptr + sg->next_addr); + ss = (void *)(sc->base + sg->next_addr); sg->next_addr += C_ALIGN(sizeof *ss); /* Grab for allocated space */ - allocation = sc->ptr + sg->next_addr; + allocation = sc->base + sg->next_addr; sg->next_addr += size; /* Paint our marker */ - memcpy(sc->ptr + sg->next_addr, "HERE", 4); + memcpy(sc->base + sg->next_addr, "HERE", 4); if (sgp != NULL) { /* Make reservation in the index */ @@ -1411,7 +1411,7 @@ smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) // XXX: wrong: ss->where = sg->next_addr + sizeof *ss; assert((uintmax_t)ss->space == (uintmax_t)size); assert((char*)ss->ptr > (char*)ss); - assert((char*)ss->ptr + ss->space <= (char*)sc->ptr + sc->mediasize); + assert((char*)ss->ptr + ss->space <= (char*)sc->base + sc->mediasize); return (ss); } @@ -1442,6 +1442,10 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, st = smp_allocx(stv, ltot, &sg); if (st == NULL) return (NULL); + if (st->space < ltot) { + // XXX: smp_free(st); + return (NULL); + } assert(st->space >= ltot); ltot = st->len = st->space; @@ -1463,7 +1467,7 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, sg->nobj++; assert(sizeof so->hash == DIGEST_LEN); memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); - so->ttl = o->ttl; + so->ttl = o->ttl; /* XXX: grace? */ so->ptr = o; so->ban = o->ban_t; @@ -1486,40 +1490,24 @@ smp_alloc(struct stevedore *st, size_t size) return (smp_allocx(st, size, NULL)); } +/*-------------------------------------------------------------------- + * Trim a bite + * XXX: We could trim the last allocation. + */ + static void smp_trim(struct storage *ss, size_t size) { - struct smp_sc *sc; - struct smp_seg *sg; - const char z[4] = { 0, 0, 0, 0}; - - return; - - CAST_OBJ_NOTNULL(sc, ss->priv, SMP_SC_MAGIC); - - /* We want 16 bytes alignment */ - size |= 0xf; - size += 1; - sg = sc->cur_seg; - if (ss->ptr + ss->space != sg->next_addr + sc->ptr) - return; - - Lck_Lock(&sc->mtx); - sg = sc->cur_seg; - if (ss->ptr + ss->space == sg->next_addr + sc->ptr) { - memcpy(sc->ptr + sg->next_addr, z, 4); - sg->next_addr -= ss->space - size; - ss->space = size; - memcpy(sc->ptr + sg->next_addr, "HERE", 4); - } - Lck_Unlock(&sc->mtx); + (void)ss; + (void)size; } /*-------------------------------------------------------------------- - * We don't track frees of storage, we track the objects which own them - * instead, when there are no more objects in in the first segment, it - * can be reclaimed. + * We don't track frees of storage, we track the objects which own the + * storage and when there are no more objects in in the first segment, + * it can be reclaimed. + * XXX: We could free the last allocation, but does that happen ? */ static void __match_proto__(storage_free_f) @@ -1540,16 +1528,13 @@ SMP_Ready(void) struct smp_sc *sc; ASSERT_CLI(); - while (1) { - VTAILQ_FOREACH(sc, &silos, list) { - if (sc->flags & SMP_F_LOADED) - continue; + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_F_LOADED)) + break; + if (sc != NULL) (void)sleep(1); - break; - } - if (sc == NULL) - break; - } + } while (sc != NULL); } /*--------------------------------------------------------------------*/ diff --git a/include/persistent.h b/include/persistent.h index 2a229c0..5642e3b 100644 --- a/include/persistent.h +++ b/include/persistent.h @@ -44,11 +44,11 @@ * sha256[...] checksum of same * * struct smp_sign; - * struct smp_segment_1[N]; Segment table + * struct smp_segment_1[N]; First Segment table * sha256[...] checksum of same * * struct smp_sign; - * struct smp_segment_2[N]; Segment table + * struct smp_segment_2[N]; Second Segment table * sha256[...] checksum of same * * N segments { From phk at varnish-cache.org Tue Feb 1 12:13:57 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 01 Feb 2011 13:13:57 +0100 Subject: [master] e8b3d50 Don't wait for a CLI connection if the varnishd process closed the debug pipe. Message-ID: commit e8b3d50e4878016241868a3266843de4e93cbf27 Author: Poul-Henning Kamp Date: Tue Feb 1 12:13:32 2011 +0000 Don't wait for a CLI connection if the varnishd process closed the debug pipe. diff --git a/bin/varnishtest/vtc_varnish.c b/bin/varnishtest/vtc_varnish.c index 56c6f4d..7e59311 100644 --- a/bin/varnishtest/vtc_varnish.c +++ b/bin/varnishtest/vtc_varnish.c @@ -237,7 +237,7 @@ varnish_launch(struct varnish *v) int i, nfd, nap; struct vss_addr **ap; char abuf[128], pbuf[128]; - struct pollfd fd; + struct pollfd fd[2]; enum cli_status_e u; char *r; @@ -301,13 +301,25 @@ varnish_launch(struct varnish *v) AZ(pthread_create(&v->tp, NULL, varnish_thread, v)); /* Wait for the varnish to call home */ - fd.fd = v->cli_fd; - fd.events = POLLIN; - i = poll(&fd, 1, 10000); - if (i != 1) { + fd[0].fd = v->cli_fd; + fd[0].events = POLLIN; + fd[1].fd = v->fds[0]; + fd[1].events = POLLOUT; + i = poll(fd, 2, 10000); + vtc_log(v->vl, 4, "CLIPOLL %d 0x%x 0x%x", + i, fd[0].revents, fd[1].revents); + if (i == 0) { vtc_log(v->vl, 0, "FAIL timeout waiting for CLI connection"); return; } + if (fd[1].revents & POLLHUP) { + vtc_log(v->vl, 0, "FAIL debug pipe closed"); + return; + } + if (!(fd[0].revents & POLLIN)) { + vtc_log(v->vl, 0, "FAIL CLI connection wait failure"); + return; + } nfd = accept(v->cli_fd, NULL, NULL); if (nfd < 0) { vtc_log(v->vl, 0, "FAIL no CLI connection accepted"); From phk at varnish-cache.org Tue Feb 1 12:48:38 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 01 Feb 2011 13:48:38 +0100 Subject: [master] 8f848fb Set the alignment of things in the persistent silo on a more systematic footing. Message-ID: commit 8f848fbab1201dc8eb90aaf8dc8c81b35aac3d28 Author: Poul-Henning Kamp Date: Tue Feb 1 12:48:08 2011 +0000 Set the alignment of things in the persistent silo on a more systematic footing. diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 740c4d0..2ee906e 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -66,9 +66,6 @@ SVNID("$Id$") #define ASSERT_SILO_THREAD(sc) \ do {assert(pthread_self() == (sc)->thread);} while (0) -#define RDN2(x, y) ((x)&(~((y)-1))) /* if y is powers of two */ -#define RUP2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ - #define OC_F_NEEDFIXUP OC_F_PRIV /* @@ -130,7 +127,8 @@ struct smp_sc { int fd; const char *filename; off_t mediasize; - unsigned granularity; + uint64_t align; /* 64b to avoid casts */ + uint32_t granularity; uint32_t unique; uint8_t *base; @@ -173,11 +171,23 @@ struct smp_sc { uint64_t free_reserve; }; -#define CACHE_LINE_ALIGN 16 +/*--------------------------------------------------------------------*/ + +/* Generic power-2 rounding */ +#define PWR2(x) ((((x)-1)&(x))==0) /* Is a power of two */ +#define RDN2(x, y) ((x)&(~((y)-1))) /* if y is powers of two */ +#define RUP2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ + +/* Pointer round up/down & assert */ +#define PRNDN(sc, x) ((void*)RDN2((uintptr_t)x, sc->align)) +#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)x, sc->align)) +#define ASSERTALIGN(sc, x) assert(PRDN(sc, x) == x) -#define C_ALIGN(x) RUP2(x, CACHE_LINE_ALIGN) +/* Integer round up/down & assert */ +#define IRNDN(sc, x) RDN2(x, sc->align) +#define IRNUP(sc, x) RUP2(x, sc->align) -#define SEG_SPACE RUP2(SMP_SIGN_SPACE, CACHE_LINE_ALIGN) +/*--------------------------------------------------------------------*/ /* * silos is unlocked, it only changes during startup when we are @@ -356,11 +366,15 @@ smp_newsilo(struct smp_sc *sc) strcpy(si->ident, SMP_IDENT_STRING); si->byte_order = 0x12345678; si->size = sizeof *si; - si->major_version = 1; - si->minor_version = 2; + si->major_version = 2; si->unique = sc->unique; si->mediasize = sc->mediasize; si->granularity = sc->granularity; + /* + * Aim for cache-line-width + */ + si->align = sizeof(void*) * 2; + sc->align = si->align; si->stuff[SMP_BAN1_STUFF] = sc->granularity; si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; @@ -401,14 +415,17 @@ smp_valid_silo(struct smp_sc *sc) return (3); if (si->size != sizeof *si) return (4); - if (si->major_version != 1) + if (si->major_version != 2) return (5); - if (si->minor_version != 2) - return (6); if (si->mediasize != sc->mediasize) return (7); if (si->granularity != sc->granularity) return (8); + if (si->align < sizeof(void*)) + return (9); + if (!PWR2(si->align)) + return (10); + sc->align = si->align; sc->unique = si->unique; /* XXX: Sanity check stuff[6] */ @@ -544,6 +561,7 @@ smp_init(struct stevedore *parent, int ac, char * const *av) #undef SIZOF /* See comments in persistent.h */ +printf("%jd %d\n", sizeof(struct smp_ident), SMP_IDENT_SIZE); assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); /* Allocate softc */ @@ -561,6 +579,7 @@ smp_init(struct stevedore *parent, int ac, char * const *av) if (i == 2) ARGV_ERR("(-spersistent) need filename (not directory)\n"); + sc->align = sizeof(void*) * 2; sc->granularity = getpagesize(); sc->mediasize = STV_FileSize(sc->fd, av[1], &sc->granularity, "-spersistent"); @@ -1174,7 +1193,7 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) } assert(sg->nalloc1 * sizeof(struct smp_object) == sc->objreserv); - assert(C_ALIGN(sc->objreserv) + 2 * SEG_SPACE <= smp_spaceleft(sg)); + // assert(C_ALIGN(sc->objreserv) + 2 * SEG_SPACE <= smp_spaceleft(sg)); /* Write the OBJIDX */ sg->next_addr |= 7; @@ -1182,14 +1201,14 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) smp_def_sign(sc, sg->ctx, sg->next_addr, "OBJIDX"); smp_reset_sign(sg->ctx); smp_sync_sign(sg->ctx); - sg->next_addr += SEG_SPACE; + sg->next_addr += IRNUP(sc, SMP_SIGN_SPACE); /* Update the segment header */ sg->p.objlist = sg->next_addr; sg->p.nalloc = sg->nalloc1; p = (void*)(sc->base + sg->next_addr); - sg->next_addr += C_ALIGN(sc->objreserv); + sg->next_addr += IRNUP(sc, sc->objreserv); memcpy(p, sg->objs, sc->objreserv); sc->objbuf = sg->objs; @@ -1200,7 +1219,7 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) smp_def_sign(sc, sg->ctx, sg->next_addr, "SEGTAIL"); smp_reset_sign(sg->ctx); smp_sync_sign(sg->ctx); - sg->next_addr += SEG_SPACE; + sg->next_addr += IRNUP(sc, SMP_SIGN_SPACE); sg->p.length = sg->next_addr - sg->p.offset; @@ -1327,19 +1346,19 @@ smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); Lck_Lock(&sc->mtx); - size = C_ALIGN(size); + size = IRNUP(sc, size); for (tries = 0; tries < 3; tries++) { sg = sc->cur_seg; CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - overhead = C_ALIGN(sizeof *ss); - overhead += SEG_SPACE * 2; + overhead = IRNUP(sc, sizeof *ss); + overhead += 2 * IRNUP(sc, SMP_SIGN_SPACE); if (sgp == NULL) { - overhead += C_ALIGN(sc->objreserv); + overhead += IRNUP(sc, sc->objreserv); } else { - overhead += - C_ALIGN(sizeof(struct smp_object) + sc->objreserv); + overhead += IRNUP(sc, + sizeof(struct smp_object) + sc->objreserv); } needed = overhead + size; left = smp_spaceleft(sg); @@ -1368,7 +1387,7 @@ smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) smp_new_seg(sc); } - assert(size == C_ALIGN(size)); + assert(size == IRNUP(sc, size)); if (needed > smp_spaceleft(sg)) { Lck_Unlock(&sc->mtx); @@ -1379,12 +1398,14 @@ smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) /* Grab for storage struct */ ss = (void *)(sc->base + sg->next_addr); - sg->next_addr += C_ALIGN(sizeof *ss); + sg->next_addr += IRNUP(sc, sizeof *ss); /* Grab for allocated space */ allocation = sc->base + sg->next_addr; sg->next_addr += size; + assert((char*)allocation > (char*)ss); + /* Paint our marker */ memcpy(sc->base + sg->next_addr, "HERE", 4); diff --git a/include/persistent.h b/include/persistent.h index 5642e3b..596a7ed 100644 --- a/include/persistent.h +++ b/include/persistent.h @@ -79,10 +79,10 @@ struct smp_ident { uint32_t major_version; - uint32_t minor_version; - uint32_t unique; + uint32_t align; /* alignment in silo */ + uint32_t granularity; /* smallest ... in bytes */ uint64_t mediasize; /* ... in bytes */ From phk at varnish-cache.org Tue Feb 1 16:55:05 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 01 Feb 2011 17:55:05 +0100 Subject: [master] 6af4255 Overhaul the allocation & open-segment house-keeping code Message-ID: commit 6af4255312dc5fa13fa62cdaef27c67af78e2b4c Author: Poul-Henning Kamp Date: Tue Feb 1 16:54:42 2011 +0000 Overhaul the allocation & open-segment house-keeping code diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 2ee906e..4fc062f 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -104,12 +104,12 @@ struct smp_seg { uint32_t nobj; /* Number of objects */ uint32_t nalloc; /* Allocations */ uint32_t nalloc1; /* Allocated objects */ - uint32_t nalloc2; /* Registered objects */ uint32_t nfixed; /* How many fixed objects */ /* Only for open segment */ struct smp_object *objs; /* objdesc array */ - uint64_t next_addr; /* next write address */ + uint64_t next_bot; /* next alloc address bottom */ + uint64_t next_top; /* next alloc address top */ struct smp_signctx ctx[1]; }; @@ -139,7 +139,6 @@ struct smp_sc { struct smp_seg *cur_seg; uint64_t free_offset; - uint64_t objreserv; pthread_t thread; VTAILQ_ENTRY(smp_sc) list; @@ -154,8 +153,6 @@ struct smp_sc { struct lock mtx; - struct smp_object *objbuf; - /* Cleaner metrics */ unsigned min_nseg; @@ -179,13 +176,20 @@ struct smp_sc { #define RUP2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ /* Pointer round up/down & assert */ -#define PRNDN(sc, x) ((void*)RDN2((uintptr_t)x, sc->align)) -#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)x, sc->align)) -#define ASSERTALIGN(sc, x) assert(PRDN(sc, x) == x) +#define PRNDN(sc, x) ((void*)RDN2((uintptr_t)(x), sc->align)) +#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)(x), sc->align)) +#define PASSERTALIGN(sc, x) assert(PRNDN(sc, x) == (x)) /* Integer round up/down & assert */ #define IRNDN(sc, x) RDN2(x, sc->align) #define IRNUP(sc, x) RUP2(x, sc->align) +#define IASSERTALIGN(sc, x) assert(IRNDN(sc, x) == (x)) + +/*--------------------------------------------------------------------*/ + +#define ASSERT_PTR_IN_SILO(sc, ptr) \ + assert((const void*)(ptr) >= (const void*)((sc)->base) && \ + (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize)) /*--------------------------------------------------------------------*/ @@ -561,7 +565,6 @@ smp_init(struct stevedore *parent, int ac, char * const *av) #undef SIZOF /* See comments in persistent.h */ -printf("%jd %d\n", sizeof(struct smp_ident), SMP_IDENT_SIZE); assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); /* Allocate softc */ @@ -665,6 +668,23 @@ smp_save_segs(struct smp_sc *sc) smp_save_seg(sc, &sc->seg2); } + +/*--------------------------------------------------------------------- + */ + +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) +{ + struct smp_object *so; + unsigned smp_idx; + + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->nalloc1); + so = &sg->objs[sg->nalloc1 - smp_idx]; + return (so); +} + /*--------------------------------------------------------------------- * objcore methods for persistent objects */ @@ -674,7 +694,7 @@ smp_oc_getobj(struct worker *wrk, struct objcore *oc) { struct object *o; struct smp_seg *sg; - unsigned smp_index; + struct smp_object *so; /* Some calls are direct, but they should match anyway */ assert(oc->methods->getobj == smp_oc_getobj); @@ -684,10 +704,16 @@ smp_oc_getobj(struct worker *wrk, struct objcore *oc) AZ(oc->flags & OC_F_NEEDFIXUP); CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - smp_index = oc->priv2; - assert(smp_index < sg->nalloc2); + so = smp_find_so(sg, oc); - o = sg->objs[smp_index].ptr; + o = so->ptr; + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); /* @@ -718,7 +744,7 @@ smp_oc_updatemeta(struct objcore *oc) { struct object *o; struct smp_seg *sg; - unsigned smp_index; + struct smp_object *so; double mttl; CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); @@ -727,8 +753,7 @@ smp_oc_updatemeta(struct objcore *oc) CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - smp_index = oc->priv2; - assert(smp_index < sg->nalloc2); + so = smp_find_so(sg, oc); if (isnan(o->grace)) mttl = o->ttl; @@ -738,12 +763,12 @@ smp_oc_updatemeta(struct objcore *oc) if (sg == sg->sc->cur_seg) { /* Lock necessary, we might race close_seg */ Lck_Lock(&sg->sc->mtx); - sg->objs[smp_index].ban = o->ban_t; - sg->objs[smp_index].ttl = mttl; + so->ban = o->ban_t; + so->ttl = mttl; Lck_Unlock(&sg->sc->mtx); } else { - sg->objs[smp_index].ban = o->ban_t; - sg->objs[smp_index].ttl = mttl; + so->ban = o->ban_t; + so->ttl = mttl; } } @@ -752,19 +777,18 @@ smp_oc_freeobj(struct objcore *oc) { struct smp_seg *sg; struct object *o; - unsigned smp_index; + struct smp_object *so; CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); o = smp_oc_getobj(NULL, oc); AN(o); CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - smp_index = oc->priv2; - assert(smp_index < sg->nalloc2); + so = smp_find_so(sg, oc); Lck_Lock(&sg->sc->mtx); - sg->objs[smp_index].ttl = 0; - sg->objs[smp_index].ptr = 0; + so->ttl = 0; + so->ptr = 0; assert(sg->nobj > 0); assert(sg->nfixed > 0); @@ -891,11 +915,12 @@ smp_segend(const struct smp_seg *sg) } static uint64_t -smp_spaceleft(const struct smp_seg *sg) +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) { - assert(sg->next_addr <= smp_segend(sg)); - return (smp_segend(sg) - sg->next_addr); + IASSERTALIGN(sc, sg->next_bot); + assert(sg->next_bot <= sg->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + return ((sg->next_top - sg->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); } /*-------------------------------------------------------------------- @@ -917,7 +942,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) { struct smp_object *so; struct objcore *oc; - uint32_t no, n; + uint32_t no; double t_now = TIM_real(); struct smp_signctx ctx[1]; @@ -933,14 +958,16 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); if (smp_chk_sign(ctx)) return; + + /* test SEGTAIL */ + /* test OBJIDX */ so = (void*)(sc->base + sg->p.objlist); sg->objs = so; - sg->nalloc2 = sg->p.nalloc; + sg->nalloc1 = sg->p.nalloc; no = sg->p.nalloc; /* Clear the bogus "hold" count */ sg->nobj = 0; - n = 0; - for (;no > 0; so++,no--,n++) { + for (;no > 0; so++,no--) { if (so->ttl > 0 && so->ttl < t_now) continue; if (so->ttl < 0 && -so->ttl < t_now) @@ -950,7 +977,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; oc->flags &= ~OC_F_BUSY; oc->priv = sg; - oc->priv2 = n; + oc->priv2 = no; oc->methods = &smp_oc_methods; oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); @@ -1003,7 +1030,6 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) * [__xxxxyyyyzzzz___] * Plenty of space at tail, do nothing. */ -//printf("TRS: %jx @ %jx\n", l, sc->free_offset); } else if (ss->offset > se->offset) { /* * [zzzz____xxxxyyyy_] @@ -1013,11 +1039,9 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) * last check. */ while (ss < se && ss->offset > se->offset) { -//printf("TEST_SEG1 %jx...%jx\n", ss->offset, ss->offset + ss->length); l = ss->offset - (se->offset + se->length); if (l > sc->free_reserve) break; -//printf("DROP_SEG1 %jx...%jx\n", ss->offset, ss->offset + ss->length); ss++; n++; } @@ -1030,11 +1054,9 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) */ sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; while (ss < se) { -//printf("TEST_SEG2 %jx...%jx\n", ss->p.offset, ss->p.offset + ss->length); l = ss->offset - sc->free_offset; if (l > sc->free_reserve) break; -//printf("DROP_SEG2 %jx...%jx\n", ss->p.offset, ss->p.offset + ss->length); ss++; n++; } @@ -1042,12 +1064,10 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) assert (l >= sc->free_reserve); -//printf("FRS: %jx @ %jx\n", l, sc->free_offset); sg1 = NULL; sg2 = NULL; for(; ss <= se; ss++) { -// printf("LOAD_SEG %jx...%jx\n", ss->p.offset, ss->p.offset + ss->length); ALLOC_OBJ(sg, SMP_SEG_MAGIC); AN(sg); sg->lru = LRU_Alloc(); @@ -1105,14 +1125,6 @@ smp_new_seg(struct smp_sc *sc) sg->lru = LRU_Alloc(); CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - AN(sc->objbuf); - sg->objs = sc->objbuf; - sc->objbuf = NULL; - AN(sg->objs); - - /* XXX: debugging */ - memset(sg->objs, 0x11, sizeof *sg->objs * sc->aim_nobj); - /* XXX: find where it goes in silo */ sg->p.offset = sc->free_offset; @@ -1147,8 +1159,9 @@ smp_new_seg(struct smp_sc *sc) assert(smp_segend(sg) <= sg2->p.offset); } - sc->free_offset += sg->p.length; - + sg->p.offset = IRNUP(sc, sg->p.offset); + sg->p.length = IRNDN(sc, sg->p.length); + sc->free_offset = sg->p.offset + sg->p.length; VTAILQ_INSERT_TAIL(&sc->segments, sg, list); @@ -1158,13 +1171,15 @@ smp_new_seg(struct smp_sc *sc) smp_reset_sign(sg->ctx); smp_sync_sign(sg->ctx); - /* Set up our allocation point */ + /* Set up our allocation points */ sc->cur_seg = sg; - sg->next_addr = sg->p.offset + - sizeof (struct smp_sign) + // XXX use macro - SHA256_LEN; - memcpy(sc->base + sg->next_addr, "HERE", 4); - sc->objreserv = 0; + sg->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sg->next_top = smp_segend(sg); + sg->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sg->next_bot); + IASSERTALIGN(sc, sg->next_top); + sg->objs = (void*)(sc->base + sg->next_top); + sg->nalloc1 = 0; } /*-------------------------------------------------------------------- @@ -1174,54 +1189,60 @@ smp_new_seg(struct smp_sc *sc) static void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) { - void *p; + uint64_t left, dst, len; + void *dp; Lck_AssertHeld(&sc->mtx); - /* XXX: if segment is empty, delete instead */ assert(sg == sc->cur_seg); AN(sg->p.offset); - sc->cur_seg = NULL; if (sg->nalloc == 0) { - sc->objbuf = sg->objs; - AN(sc->objbuf); + /* XXX: if segment is empty, delete instead */ VTAILQ_REMOVE(&sc->segments, sg, list); free(sg); return; } - assert(sg->nalloc1 * sizeof(struct smp_object) == sc->objreserv); - // assert(C_ALIGN(sc->objreserv) + 2 * SEG_SPACE <= smp_spaceleft(sg)); + assert(sg->next_bot <= sg->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + IASSERTALIGN(sc, sg->next_bot); - /* Write the OBJIDX */ - sg->next_addr |= 7; - sg->next_addr++; - smp_def_sign(sc, sg->ctx, sg->next_addr, "OBJIDX"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - sg->next_addr += IRNUP(sc, SMP_SIGN_SPACE); + /* + * If there is enough space left, that we can move the smp_objects + * down without overwriting the present copy, we will do so to + * compact the segment. + */ + left = smp_spaceleft(sc, sg); + len = sizeof(struct smp_object) * sg->nalloc1; + if (len < left) { + dst = sg->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dp = sc->base + dst; + assert((uintptr_t)dp + len < (uintptr_t)sg->objs); + memcpy(dp, sg->objs, len); + sg->next_top = dst; + sg->objs = dp; + sg->p.length = sg->next_top + len + IRNUP(sc, SMP_SIGN_SPACE); + (void)smp_spaceleft(sc, sg); /* for asserts */ + + } /* Update the segment header */ - sg->p.objlist = sg->next_addr; + sg->p.objlist = sg->next_top; sg->p.nalloc = sg->nalloc1; - p = (void*)(sc->base + sg->next_addr); - sg->next_addr += IRNUP(sc, sc->objreserv); - - memcpy(p, sg->objs, sc->objreserv); - sc->objbuf = sg->objs; - /* XXX: membarrier */ - sg->objs = p; - - /* Write the SEGTAIL */ - smp_def_sign(sc, sg->ctx, sg->next_addr, "SEGTAIL"); + /* Write the (empty) OBJIDX signature */ + sg->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sg->next_top >= sg->next_bot); + smp_def_sign(sc, sg->ctx, sg->next_top, "OBJIDX"); smp_reset_sign(sg->ctx); smp_sync_sign(sg->ctx); - sg->next_addr += IRNUP(sc, SMP_SIGN_SPACE); - sg->p.length = sg->next_addr - sg->p.offset; + /* Write the (empty) SEGTAIL signature */ + smp_def_sign(sc, sg->ctx, + sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); /* Save segment list */ smp_save_segs(sc); @@ -1271,9 +1292,6 @@ smp_open(const struct stevedore *st) Lck_New(&sc->mtx, lck_smp); Lck_Lock(&sc->mtx); - sc->objbuf = malloc(sizeof *sc->objbuf * sc->aim_nobj); - AN(sc->objbuf); - /* We trust the parent to give us a valid silo, for good measure: */ AZ(smp_valid_silo(sc)); @@ -1326,113 +1344,86 @@ smp_close(const struct stevedore *st) } /*-------------------------------------------------------------------- - * Allocate a bite, possibly for an object. + * Allocate a bite. * - * if the segment pointer is provided, we are allocating for an object - * structure, and should reserve space for the smp_object structure in - * the index. This complicates things somewhat. + * Allocate [min_size...max_size] space from the bottom of the segment, + * as is convenient. + * + * If 'so' + 'idx' is given, also allocate a smp_object from the top + * of the segment. + * + * Return the segment in 'ssg' if given. */ static struct storage * -smp_allocx(struct stevedore *st, size_t size, struct smp_seg **sgp) +smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, + struct smp_object **so, unsigned *idx, struct smp_seg **ssg) { struct smp_sc *sc; struct storage *ss; struct smp_seg *sg; - uint64_t needed, left, overhead; - void *allocation; unsigned tries; + uint64_t left, extra; CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - Lck_Lock(&sc->mtx); + assert(min_size <= max_size); - size = IRNUP(sc, size); + max_size = IRNUP(sc, max_size); + min_size = IRNUP(sc, min_size); - for (tries = 0; tries < 3; tries++) { - sg = sc->cur_seg; - CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - - overhead = IRNUP(sc, sizeof *ss); - overhead += 2 * IRNUP(sc, SMP_SIGN_SPACE); - if (sgp == NULL) { - overhead += IRNUP(sc, sc->objreserv); - } else { - overhead += IRNUP(sc, - sizeof(struct smp_object) + sc->objreserv); - } - needed = overhead + size; - left = smp_spaceleft(sg); - - if (sgp == NULL && needed > left && (overhead + 4096) < left) { - /* XXX: Also check the bit we cut off isn't silly - * short - */ - /* - * Non-objects can be trimmed down to fit what we - * have to offer (think: DVD image), but we do not - * want to trim down to trivial sizes. - */ - size = left - overhead; - needed = overhead + size; - assert(needed <= left); - size &= ~15; /* XXX */ - } + extra = IRNUP(sc, sizeof(*ss)); + if (so != NULL) { + extra += sizeof(**so); + AN(idx); + } - /* If there is space, fine */ - if (needed <= left && - (sgp == NULL || sg->nalloc1 < sc->aim_nobj)) + Lck_Lock(&sc->mtx); + sg = NULL; + ss = NULL; + for (tries = 0; tries < 3; tries++) { + left = smp_spaceleft(sc, sc->cur_seg); + if (left >= extra + min_size) break; - smp_close_seg(sc, sc->cur_seg); smp_new_seg(sc); } + if (left >= extra + min_size) { + if (left < extra + max_size) + max_size = IRNDN(sc, left - extra); - assert(size == IRNUP(sc, size)); - - if (needed > smp_spaceleft(sg)) { - Lck_Unlock(&sc->mtx); - return (NULL); - } - - assert(needed <= smp_spaceleft(sg)); - - /* Grab for storage struct */ - ss = (void *)(sc->base + sg->next_addr); - sg->next_addr += IRNUP(sc, sizeof *ss); - - /* Grab for allocated space */ - allocation = sc->base + sg->next_addr; - sg->next_addr += size; - - assert((char*)allocation > (char*)ss); - - /* Paint our marker */ - memcpy(sc->base + sg->next_addr, "HERE", 4); - - if (sgp != NULL) { - /* Make reservation in the index */ - assert(sg->nalloc1 < sc->aim_nobj); - sg->nalloc1++; - sc->objreserv += sizeof (struct smp_object); - assert(sc->objreserv <= smp_spaceleft(sg)); - *sgp = sg; + sg = sc->cur_seg; + ss = (void*)(sc->base + sg->next_bot); + sg->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + sg->nalloc++; + if (so != NULL) { + sg->next_top -= sizeof(**so); + *so = (void*)(sc->base + sg->next_top); + /* Render this smp_object mostly harmless */ + (*so)->ttl = 0.; + (*so)->ban = 0.; + (*so)->ptr = NULL; + sg->objs = *so; + *idx = ++sg->nalloc1; + } + (void)smp_spaceleft(sc, sg); /* for the assert */ } - - sg->nalloc++; Lck_Unlock(&sc->mtx); + if (ss == NULL) + return (ss); + AN(sg); + assert(max_size >= min_size); + /* Fill the storage structure */ memset(ss, 0, sizeof *ss); ss->magic = STORAGE_MAGIC; - ss->ptr = allocation; - ss->space = size; + ss->ptr = PRNUP(sc, ss + 1); + ss->space = max_size; ss->priv = sc; ss->stevedore = st; ss->fd = sc->fd; - // XXX: wrong: ss->where = sg->next_addr + sizeof *ss; - assert((uintmax_t)ss->space == (uintmax_t)size); - assert((char*)ss->ptr > (char*)ss); - assert((char*)ss->ptr + ss->space <= (char*)sc->base + sc->mediasize); + if (ssg != NULL) + *ssg = sg; return (ss); } @@ -1451,22 +1442,17 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, struct smp_seg *sg; struct smp_object *so; struct objcore *oc; - unsigned smp_index; + unsigned objidx; CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); - - /* XXX: temporary sanity */ AN(sp->objcore); AN(sp->wrk->ttl >= 0); - sg = NULL; - st = smp_allocx(stv, ltot, &sg); + ltot = IRNUP(sc, ltot); + + st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); if (st == NULL) return (NULL); - if (st->space < ltot) { - // XXX: smp_free(st); - return (NULL); - } assert(st->space >= ltot); ltot = st->len = st->space; @@ -1480,12 +1466,10 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, oc->flags |= OC_F_LRUDONTMOVE; Lck_Lock(&sc->mtx); - assert(sg->nalloc2 < sg->nalloc1); - - smp_index = sg->nalloc2++; - so = &sg->objs[smp_index]; sg->nfixed++; sg->nobj++; + + /* We have to do this somewhere, might as well be here... */ assert(sizeof so->hash == DIGEST_LEN); memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); so->ttl = o->ttl; /* XXX: grace? */ @@ -1493,7 +1477,7 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, so->ban = o->ban_t; oc->priv = sg; - oc->priv2 = smp_index; + oc->priv2 = objidx; oc->methods = &smp_oc_methods; Lck_Unlock(&sc->mtx); @@ -1508,7 +1492,7 @@ static struct storage * smp_alloc(struct stevedore *st, size_t size) { - return (smp_allocx(st, size, NULL)); + return (smp_allocx(st, 4096, size, NULL, NULL, NULL)); } /*-------------------------------------------------------------------- From tfheen at varnish-cache.org Thu Feb 3 08:27:31 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 09:27:31 +0100 Subject: [master] 921a351 need more room for temporary gzip space on stack Message-ID: commit 921a35183de4d295823d82c8c5ee2800b446d089 Author: Nils Goroll Date: Wed Feb 2 17:15:54 2011 +0100 need more room for temporary gzip space on stack diff --git a/bin/varnishtest/tests/e00022.vtc b/bin/varnishtest/tests/e00022.vtc index 871b2c0..fb11da3 100644 --- a/bin/varnishtest/tests/e00022.vtc +++ b/bin/varnishtest/tests/e00022.vtc @@ -16,7 +16,7 @@ server s1 { } } -start -varnish v1 -vcl+backend { +varnish v1 -arg "-p sess_workspace=131072 -p thread_pool_stack=262144" -vcl+backend { sub vcl_recv { set req.esi = true; } From tfheen at varnish-cache.org Thu Feb 3 12:15:32 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:32 +0100 Subject: [master] 85e6bd2 Change output format of parameter dump to rst Message-ID: commit 85e6bd286516c7f066f20823c36cc46851a2325d Author: Tollef Fog Heen Date: Thu Feb 3 10:41:56 2011 +0100 Change output format of parameter dump to rst If varnishd is compiled with -DDIAGNOSTIC and run with -x dumprst (previously -x dumpmdoc) it will dump the paramer definitions in rst (previously mdoc) format, suitable for including into the varnishd reference documentation. diff --git a/bin/varnishd/mgt.h b/bin/varnishd/mgt.h index 2dd07bd..122d384 100644 --- a/bin/varnishd/mgt.h +++ b/bin/varnishd/mgt.h @@ -68,7 +68,7 @@ void MCF_ParamSync(void); void MCF_ParamInit(struct cli *); void MCF_ParamSet(struct cli *, const char *param, const char *val); #ifdef DIAGNOSTICS -void MCF_DumpMdoc(void); +void MCF_DumpRst(void); #endif /* mgt_shmem.c */ diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 4ba6554..616b627 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -1083,26 +1083,25 @@ MCF_ParamInit(struct cli *cli) #ifdef DIAGNOSTICS void -MCF_DumpMdoc(void) +MCF_DumpRst(void) { const struct parspec *pp; const char *p, *q; int i; - printf(".Bl -tag -width 4n\n"); for (i = 0; i < nparspec; i++) { pp = parspec[i]; - printf(".It Va %s\n", pp->name); + printf("%s\n", pp->name); if (pp->units != NULL && *pp->units != '\0') - printf("Units:\n.Dv %s\n.br\n", pp->units); - printf("Default:\n.Dv %s\n.br\n", pp->def); + printf("\t- Units: %s\n", pp->units); + printf("\t- Default: %s\n", pp->def == MAGIC_INIT_STRING ? "magic" : pp->def); /* * XXX: we should mark the params with one/two flags * XXX: that say if ->min/->max are valid, so we * XXX: can emit those also in help texts. */ if (pp->flags) { - printf("Flags:\n.Dv \""); + printf("\t- Flags: "); q = ""; if (pp->flags & DELAYED_EFFECT) { printf("%sdelayed", q); @@ -1120,23 +1119,26 @@ MCF_DumpMdoc(void) printf("%sexperimental", q); q = ", "; } - printf("\"\n.br\n"); + printf("\n"); } - printf(".Pp\n"); + printf("\n\t"); for (p = pp->descr; *p; p++) { if (*p == '\n' && p[1] =='\0') break; if (*p == '\n' && p[1] =='\n') { - printf("\n.Pp\n"); + printf("\n\n\t"); p++; } else if (*p == '\n') { - printf("\n.br\n"); + printf("\n\t"); + } else if (*p == ':' && p[1] == '\n') { + /* Start of definition list, use RSTs code mode for this */ + printf("::\n"); } else { printf("%c", *p); } } - printf("\n.Pp\n"); + printf("\n\n"); } - printf(".El\n"); + printf("\n"); } #endif /* DIAGNOSTICS */ diff --git a/bin/varnishd/varnishd.c b/bin/varnishd/varnishd.c index a6f1769..78765d2 100644 --- a/bin/varnishd/varnishd.c +++ b/bin/varnishd/varnishd.c @@ -501,8 +501,8 @@ main(int argc, char * const *argv) exit(0); case 'x': #ifdef DIAGNOSTICS - if (!strcmp(optarg, "dumpmdoc")) { - MCF_DumpMdoc(); + if (!strcmp(optarg, "dumprst")) { + MCF_DumpRst(); exit (0); } #endif /* DIAGNOSTICS */ From tfheen at varnish-cache.org Thu Feb 3 12:15:34 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:34 +0100 Subject: [master] 60a22cb Fix typo in parameter description Message-ID: commit 60a22cb1a6d58adf5e1f198a8bd0638b773ff72f Author: Tollef Fog Heen Date: Thu Feb 3 10:43:49 2011 +0100 Fix typo in parameter description diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 616b627..a2aa9b8 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -797,7 +797,7 @@ static const struct parspec input_parspec[] = { { "ban_lurker_sleep", tweak_timeout_double, &master.ban_lurker_sleep, 0, UINT_MAX, "How long time does the ban lurker thread sleeps between " - "successfull attempts to push the last item up the ban " + "successful attempts to push the last item up the ban " " list. It always sleeps a second when nothing can be done.\n" "A value of zero disables the ban lurker.", 0, From tfheen at varnish-cache.org Thu Feb 3 12:15:36 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:36 +0100 Subject: [master] 9df11bb Update parameter descriptions from rst Message-ID: commit 9df11bb55820812a587746ebbde9f8fb7d6e73a9 Author: Tollef Fog Heen Date: Thu Feb 3 10:58:59 2011 +0100 Update parameter descriptions from rst diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index a2aa9b8..d615dd9 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -814,11 +814,21 @@ static const struct parspec input_parspec[] = { EXPERIMENTAL, "off", "bool" }, { "http_gzip_support", tweak_bool, &master.http_gzip_support, 0, 0, - "Enable support for HTTP GZIP compression.\n", + "Enable gzip support. When enabled Varnish will compress " + "uncompressed objects before they are stored in the cache. " + "If a client does not support gzip encoding Varnish will " + "uncompress compressed objects on demand. Varnish will also " + "rewrite the Accept-Encoding header of clients indicating " + "support for gzip to:\n" + "Accept-Encoding: gzip" + "Clients that do not support gzip will have their " + "Accept-Encoding header removed. For more information no how " + "gzip is implemted please see the chapter on gzip in the " + "Varnish reference.", EXPERIMENTAL, "on", "bool" }, { "gzip_tmp_space", tweak_uint, &master.gzip_tmp_space, 0, 2, - "Where temporary space for gzip/gunzip is allocated.\n" + "Where temporary space for gzip/gunzip is allocated:\n" " 0 - malloc\n" " 1 - session workspace\n" " 2 - thread workspace\n" From tfheen at varnish-cache.org Thu Feb 3 12:15:38 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:38 +0100 Subject: [master] 96895eb Get rid of extra whitespace in param description Message-ID: commit 96895eb9ce1295b895c08be05022f11aadacb590 Author: Tollef Fog Heen Date: Thu Feb 3 10:59:42 2011 +0100 Get rid of extra whitespace in param description diff --git a/bin/varnishd/mgt_pool.c b/bin/varnishd/mgt_pool.c index 0640251..029b702 100644 --- a/bin/varnishd/mgt_pool.c +++ b/bin/varnishd/mgt_pool.c @@ -217,7 +217,7 @@ const struct parspec WRK_parspec[] = { "request on the object.\n" "NB: Even with the implict delay of delivery, " "this parameter controls an exponential increase in " - "number of worker threads. ", + "number of worker threads.", EXPERIMENTAL, "3", "requests per request" }, { "thread_pool_stack", From tfheen at varnish-cache.org Thu Feb 3 12:15:40 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:40 +0100 Subject: [master] 6ea1b58 Fix typo in param description Message-ID: commit 6ea1b58aec73f9c117bcc90651f13f31e2760349 Author: Tollef Fog Heen Date: Thu Feb 3 11:02:21 2011 +0100 Fix typo in param description diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index d615dd9..13e7b4b 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -820,7 +820,7 @@ static const struct parspec input_parspec[] = { "uncompress compressed objects on demand. Varnish will also " "rewrite the Accept-Encoding header of clients indicating " "support for gzip to:\n" - "Accept-Encoding: gzip" + "Accept-Encoding: gzip\n\n" "Clients that do not support gzip will have their " "Accept-Encoding header removed. For more information no how " "gzip is implemted please see the chapter on gzip in the " From tfheen at varnish-cache.org Thu Feb 3 12:15:42 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 13:15:42 +0100 Subject: [master] a6be2c1 Update parameter docs from varnishd dump Message-ID: commit a6be2c19800062ede140a891fb915c64272a0e6d Author: Tollef Fog Heen Date: Thu Feb 3 13:15:13 2011 +0100 Update parameter docs from varnishd dump diff --git a/doc/sphinx/reference/varnishd.rst b/doc/sphinx/reference/varnishd.rst index f2e0ef4..89eba40 100644 --- a/doc/sphinx/reference/varnishd.rst +++ b/doc/sphinx/reference/varnishd.rst @@ -311,533 +311,529 @@ Be aware that on 32 bit systems, certain default values, such as sess_workspace (=64k) are reduced relative to the values listed here, in order to conserve VM space. acceptor_sleep_decay - Default: 0.900 - Flags: experimental + - Default: 0.900 + - Flags: experimental - If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between - accepts. - This parameter (multiplicatively) reduce the sleep duration for each succesfull accept. (ie: 0.9 = reduce - by 10%) + If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between accepts. + This parameter (multiplicatively) reduce the sleep duration for each succesfull accept. (ie: 0.9 = reduce by 10%) acceptor_sleep_incr - Units: s - Default: 0.001 - Flags: experimental + - Units: s + - Default: 0.001 + - Flags: experimental - If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between - accepts. - This parameter control how much longer we sleep, each time we fail to accept a new connection. + If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between accepts. + This parameter control how much longer we sleep, each time we fail to accept a new connection. acceptor_sleep_max - Units: s - Default: 0.050 - Flags: experimental + - Units: s + - Default: 0.050 + - Flags: experimental - If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between - accepts. - This parameter limits how long it can sleep between attempts to accept new connections. + If we run out of resources, such as file descriptors or worker threads, the acceptor will sleep between accepts. + This parameter limits how long it can sleep between attempts to accept new connections. auto_restart - Units: bool - Default: on + - Units: bool + - Default: on - Restart child process automatically if it dies. + Restart child process automatically if it dies. + +ban_dups + - Units: bool + - Default: on + + Detect and eliminate duplicate bans. ban_lurker_sleep - Units: s - Default: 0.0 + - Units: s + - Default: 0.1 - How long time does the ban lurker thread sleeps between successfull attempts to push the last item up the - purge list. It always sleeps a second when nothing can be done. - A value of zero disables the ban lurker. + How long time does the ban lurker thread sleeps between successful attempts to push the last item up the ban list. It always sleeps a second when nothing can be done. + A value of zero disables the ban lurker. between_bytes_timeout - Units: s - Default: 60 + - Units: s + - Default: 60 - Default timeout between bytes when receiving data from backend. We only wait for this many seconds between - bytes before giving up. A value of 0 means it will never time out. VCL can override this default value for - each backend request and backend request. This parameter does not apply to pipe. + Default timeout between bytes when receiving data from backend. We only wait for this many seconds between bytes before giving up. A value of 0 means it will never time out. VCL can override this default value for each backend request and backend request. This parameter does not apply to pipe. -cache_vbe_conns - Units: bool - Default: off - Flags: experimental +cache_vbcs + - Units: bool + - Default: off + - Flags: experimental - Cache vbe_conn's or rely on malloc, that's the question. + Cache vbc's or rely on malloc, that's the question. cc_command - Default: exec cc -fpic -shared -Wl,-x -o %o %s - Flags: must_reload + - Default: exec gcc -std=gnu99 -DDIAGNOSTICS -pthread -fpic -shared -Wl,-x -o %o %s + - Flags: must_reload - Command used for compiling the C source code to a dlopen(3) loadable object. Any occurrence of %s in the - string will be replaced with the source file name, and %o will be replaced with the output file name. + Command used for compiling the C source code to a dlopen(3) loadable object. Any occurrence of %s in the string will be replaced with the source file name, and %o will be replaced with the output file name. cli_buffer - Units: bytes - Default: 8192 + - Units: bytes + - Default: 8192 - Size of buffer for CLI input. - You may need to increase this if you have big VCL files and use the vcl.inline CLI command. - NB: Must be specified with -p to have effect. + Size of buffer for CLI input. + You may need to increase this if you have big VCL files and use the vcl.inline CLI command. + NB: Must be specified with -p to have effect. cli_timeout - Units: seconds - Default: 10 + - Units: seconds + - Default: 10 - Timeout for the childs replies to CLI requests from the master. + Timeout for the childs replies to CLI requests from the master. clock_skew - Units: s - Default: 10 + - Units: s + - Default: 10 - How much clockskew we are willing to accept between the backend and our own clock. + How much clockskew we are willing to accept between the backend and our own clock. connect_timeout - Units: s - Default: 0.4 + - Units: s + - Default: 0.4 + + Default connection timeout for backend connections. We only try to connect to the backend for this many seconds before giving up. VCL can override this default value for each backend and backend request. + +critbit_cooloff + - Units: s + - Default: 180.0 + - Flags: experimental - Default connection timeout for backend connections. We only try to connect to the backend for this many - seconds before giving up. VCL can override this default value for each backend and backend request. + How long time the critbit hasher keeps deleted objheads on the cooloff list. default_grace - Default: 10seconds - Flags: delayed + - Units: seconds + - Default: 10 + - Flags: delayed - Default grace period. We will deliver an object this long after it has expired, provided another thread is - attempting to get a new copy. + Default grace period. We will deliver an object this long after it has expired, provided another thread is attempting to get a new copy. + Objects already cached will not be affected by changes made until they are fetched from the backend again. default_ttl - Units: seconds - Default: 120 + - Units: seconds + - Default: 120 - The TTL assigned to objects if neither the backend nor the VCL code assigns one. - Objects already cached will not be affected by changes made until they are fetched from the backend again. - To force an immediate effect at the expense of a total flush of the cache use "purge.url ." + The TTL assigned to objects if neither the backend nor the VCL code assigns one. + Objects already cached will not be affected by changes made until they are fetched from the backend again. + To force an immediate effect at the expense of a total flush of the cache use "ban.url ." diag_bitmap - Units: bitmap - Default: 0 - Bitmap controlling diagnostics code:: - - 0x00000001 - CNT_Session states. - 0x00000002 - workspace debugging. - 0x00000004 - kqueue debugging. - 0x00000008 - mutex logging. - 0x00000010 - mutex contests. - 0x00000020 - waiting list. - 0x00000040 - object workspace. - 0x00001000 - do not core-dump child process. - 0x00002000 - only short panic message. - 0x00004000 - panic to stderr. - 0x00008000 - panic to abort2(). - 0x00010000 - synchronize shmlog. - 0x00020000 - synchronous start of persistence. - 0x80000000 - do edge-detection on digest. - - Use 0x notation and do the bitor in your head :-) + - Units: bitmap + - Default: 0 + + Bitmap controlling diagnostics code:: + + 0x00000001 - CNT_Session states. + 0x00000002 - workspace debugging. + 0x00000004 - kqueue debugging. + 0x00000008 - mutex logging. + 0x00000010 - mutex contests. + 0x00000020 - waiting list. + 0x00000040 - object workspace. + 0x00001000 - do not core-dump child process. + 0x00002000 - only short panic message. + 0x00004000 - panic to stderr. + 0x00010000 - synchronize shmlog. + 0x00020000 - synchronous start of persistence. + 0x00040000 - release VCL early. + 0x80000000 - do edge-detection on digest. + Use 0x notation and do the bitor in your head :-) err_ttl - Units: seconds - Default: 0 + - Units: seconds + - Default: 0 - The TTL assigned to the synthesized error pages + The TTL assigned to the synthesized error pages esi_syntax - Units: bitmap - Default: 0 - Bitmap controlling ESI parsing code:: + - Units: bitmap + - Default: 0 - 0x00000001 - Don't check if it looks like XML - 0x00000002 - Ignore non-esi elements - 0x00000004 - Emit parsing debug records + Bitmap controlling ESI parsing code:: - Use 0x notation and do the bitor in your head :-) + 0x00000001 - Don't check if it looks like XML + 0x00000002 - Ignore non-esi elements + 0x00000004 - Emit parsing debug records + 0x00000008 - Force-split parser input (debugging) + Use 0x notation and do the bitor in your head :-) + +expiry_sleep + - Units: seconds + - Default: 1 + + How long the expiry thread sleeps when there is nothing for it to do. Reduce if your expiry thread gets behind. fetch_chunksize - Units: kilobytes - Default: 128 - Flags: experimental + - Units: kilobytes + - Default: 128 + - Flags: experimental - The default chunksize used by fetcher. This should be bigger than the majority of objects with short TTLs. - Internal limits in the storage_file module makes increases above 128kb a dubious idea. + The default chunksize used by fetcher. This should be bigger than the majority of objects with short TTLs. + Internal limits in the storage_file module makes increases above 128kb a dubious idea. first_byte_timeout - Units: s - Default: 60 + - Units: s + - Default: 60 - Default timeout for receiving first byte from backend. We only wait for this many seconds for the first - byte before giving up. A value of 0 means it will never time out. VCL can override this default value for - each backend and backend request. This parameter does not apply to pipe. + Default timeout for receiving first byte from backend. We only wait for this many seconds for the first byte before giving up. A value of 0 means it will never time out. VCL can override this default value for each backend and backend request. This parameter does not apply to pipe. group - Default: ..... - Flags: must_restart + - Default: magic + - Flags: must_restart - The unprivileged group to run as. + The unprivileged group to run as. gzip_level - Default: 6 + - Default: 6 - Gzip compression level ranging from 1 (the fastest) to 9 (the - smallest possible output). 0 indicates "debug mode". + Gzip compression level: 0=debug, 1=fast, 9=best gzip_stack_buffer - Unit: kilobytes - Default: 2048 + - Units: Bytes + - Default: 32768 + - Flags: experimental - Size of stack buffer used for in transit gzip processing, like - on the fly decompression. + Size of stack buffer used for gzip processing. + The stack buffers are used for in-transit data, for instance gunzip'ed data being sent to a client.Making this space to small results in more overhead, writes to sockets etc, making it too big is probably just a waste of memory. gzip_tmp_space - Default: 0 - - Where temporary space for gzip/gunzip is allocated. - 0 - malloc - 1 - session workspace - 2 - thread workspace - - If you have much gzip/gunzip activity, it may be an advantage to - use workspace for these allocations to reduce malloc activity. - Be aware that gzip needs 256+KB and gunzip" needs 32+KB of - workspace (64+KB if ESI processing). + - Default: 0 + - Flags: experimental -http_headers - Units: header lines - Default: 64 + Where temporary space for gzip/gunzip is allocated:: - Maximum number of HTTP headers we will deal with. - This space is preallocated in sessions and workthreads only objects allocate only space for the headers - they store. + 0 - malloc + 1 - session workspace + 2 - thread workspace + If you have much gzip/gunzip activity, it may be an advantage to use workspace for these allocations to reduce malloc activity. Be aware that gzip needs 256+KB and gunzip needs 32+KB of workspace (64+KB if ESI processing). http_gzip_support - Default: on + - Units: bool + - Default: on + - Flags: experimental - Enable gzip support. When enabled Varnish will compress - uncompressed objects before they are stored in the cache. If a - client does not support gzip encoding Varnish will uncompress - compressed objects on demand. - Varnish will also rewrite the Accept-Encoding header of clients indicating support for gzip to: + Enable gzip support. When enabled Varnish will compress uncompressed objects before they are stored in the cache. If a client does not support gzip encoding Varnish will uncompress compressed objects on demand. Varnish will also rewrite the Accept-Encoding header of clients indicating support for gzip to:: - Accept-Encoding: gzip + Accept-Encoding: gzip - Clients that do not support gzip will have their Accept-Encoding - header removed. For more information no how gzip is implemted - please see the chapter on gzip in the Varnish reference. + Clients that do not support gzip will have their Accept-Encoding header removed. For more information no how gzip is implemted please see the chapter on gzip in the Varnish reference. -http_range - Default: off - - Enables experimental support for the HTTP range header, enabling Varnish to serve parts of - an object to a client. However, Varnish will request the whole object from the backend server. +http_headers + - Units: header lines + - Default: 64 + + Maximum number of HTTP headers we will deal with. + This space is preallocated in sessions and workthreads only objects allocate only space for the headers they store. + +http_range_support + - Units: bool + - Default: off + - Flags: experimental + + Enable support for HTTP Range headers. listen_address - Default: :80 - Flags: must_restart + - Default: :80 + - Flags: must_restart - Whitespace separated list of network endpoints where Varnish will accept requests. - Possible formats: host, host:port, :port + Whitespace separated list of network endpoints where Varnish will accept requests. + Possible formats: host, host:port, :port listen_depth - Units: connections - Default: 1024 - Flags: must_restart + - Units: connections + - Default: 1024 + - Flags: must_restart - Listen queue depth. + Listen queue depth. log_hashstring - Units: bool - Default: off + - Units: bool + - Default: off - Log the hash string to shared memory log. + Log the hash string to shared memory log. log_local_address - Units: bool - Default: off + - Units: bool + - Default: off - Log the local address on the TCP connection in the SessionOpen shared memory record. + Log the local address on the TCP connection in the SessionOpen shared memory record. lru_interval - Units: seconds - Default: 2 - Flags: experimental + - Units: seconds + - Default: 2 + - Flags: experimental - Grace period before object moves on LRU list. - Objects are only moved to the front of the LRU list if they have not been moved there already inside this - timeout period. This reduces the amount of lock operations necessary for LRU list access. + Grace period before object moves on LRU list. + Objects are only moved to the front of the LRU list if they have not been moved there already inside this timeout period. This reduces the amount of lock operations necessary for LRU list access. max_esi_includes - Units: includes - Default: 5 + - Units: includes + - Default: 5 - Maximum depth of esi:include processing. + Maximum depth of esi:include processing. max_restarts - Units: restarts - Default: 4 - - Upper limit on how many times a request can restart. - Be aware that restarts are likely to cause a hit against the backend, so don't increase thoughtlessly. - -overflow_max - Units: % - Default: 100 - Flags: experimental + - Units: restarts + - Default: 4 - Percentage permitted overflow queue length. - - This sets the ratio of queued requests to worker threads, above which sessions will be dropped instead of - queued. + Upper limit on how many times a request can restart. + Be aware that restarts are likely to cause a hit against the backend, so don't increase thoughtlessly. ping_interval - Units: seconds - Default: 3 - Flags: must_restart + - Units: seconds + - Default: 3 + - Flags: must_restart - Interval between pings from parent to child. - Zero will disable pinging entirely, which makes it possible to attach a debugger to the child. + Interval between pings from parent to child. + Zero will disable pinging entirely, which makes it possible to attach a debugger to the child. pipe_timeout - Units: seconds - Default: 60 + - Units: seconds + - Default: 60 - Idle timeout for PIPE sessions. If nothing have been received in either direction for this many seconds, - the session is closed. + Idle timeout for PIPE sessions. If nothing have been received in either direction for this many seconds, the session is closed. prefer_ipv6 - Units: bool - Default: off + - Units: bool + - Default: off + + Prefer IPv6 address when connecting to backends which have both IPv4 and IPv6 addresses. - Prefer IPv6 address when connecting to backends which have both IPv4 and IPv6 addresses. +queue_max + - Units: % + - Default: 100 + - Flags: experimental -purge_dups - Units: bool - Default: on + Percentage permitted queue length. - Detect and eliminate duplicate purges. + This sets the ratio of queued requests to worker threads, above which sessions will be dropped instead of queued. rush_exponent - Units: requests per request - Default: 3 - Flags: experimental + - Units: requests per request + - Default: 3 + - Flags: experimental - How many parked request we start for each completed request on the object. - NB: Even with the implict delay of delivery, this parameter controls an exponential increase in number of - worker threads. + How many parked request we start for each completed request on the object. + NB: Even with the implict delay of delivery, this parameter controls an exponential increase in number of worker threads. saintmode_threshold - Units: objects - Default: 10 - Flags: experimental - - The maximum number of objects held off by saint mode before no further will be made to the backend until - one times out. A value of 0 disables saintmode. -send_timeout - Units: seconds - Default: 600 - Flags: delayed + - Units: objects + - Default: 10 + - Flags: experimental - Send timeout for client connections. If no data has been sent to the client in this many seconds, the ses? - sion is closed. - See setsockopt(2) under SO_SNDTIMEO for more information. + The maximum number of objects held off by saint mode before no further will be made to the backend until one times out. A value of 0 disables saintmode. -sendfile_threshold - Units: bytes - Default: -1 - Flags: experimental +send_timeout + - Units: seconds + - Default: 600 + - Flags: delayed - The minimum size of objects transmitted with sendfile. + Send timeout for client connections. If no data has been sent to the client in this many seconds, the session is closed. + See setsockopt(2) under SO_SNDTIMEO for more information. sess_timeout - Units: seconds - Default: 5 + - Units: seconds + - Default: 5 - Idle timeout for persistent sessions. If a HTTP request has not been received in this many seconds, the - session is closed. + Idle timeout for persistent sessions. If a HTTP request has not been received in this many seconds, the session is closed. sess_workspace - Units: bytes - Default: 65536 - Flags: delayed + - Units: bytes + - Default: 65536 + - Flags: delayed - Bytes of HTTP protocol workspace allocated for sessions. This space must be big enough for the entire HTTP - protocol header and any edits done to it in the VCL code. - Minimum is 1024 bytes. + Bytes of HTTP protocol workspace allocated for sessions. This space must be big enough for the entire HTTP protocol header and any edits done to it in the VCL code. + Minimum is 1024 bytes. session_linger - Units: ms - Default: 50 - Flags: experimental + - Units: ms + - Default: 50 + - Flags: experimental - How long time the workerthread lingers on the session to see if a new request appears right away. - If sessions are reused, as much as half of all reuses happen within the first 100 msec of the previous - request completing. - Setting this too high results in worker threads not doing anything for their keep, setting it too low just - means that more sessions take a detour around the waiter. + How long time the workerthread lingers on the session to see if a new request appears right away. + If sessions are reused, as much as half of all reuses happen within the first 100 msec of the previous request completing. + Setting this too high results in worker threads not doing anything for their keep, setting it too low just means that more sessions take a detour around the waiter. session_max - Units: sessions - Default: 100000 + - Units: sessions + - Default: 100000 - Maximum number of sessions we will allocate before just dropping connections. - This is mostly an anti-DoS measure, and setting it plenty high should not hurt, as long as you have the - memory for it. + Maximum number of sessions we will allocate before just dropping connections. + This is mostly an anti-DoS measure, and setting it plenty high should not hurt, as long as you have the memory for it. shm_reclen - Units: bytes - Default: 255 + - Units: bytes + - Default: 255 - Maximum number of bytes in SHM log record. - Maximum is 65535 bytes. + Maximum number of bytes in SHM log record. + Maximum is 65535 bytes. shm_workspace - Units: bytes - Default: 8192 - Flags: delayed + - Units: bytes + - Default: 8192 + - Flags: delayed + + Bytes of shmlog workspace allocated for worker threads. If too big, it wastes some ram, if too small it causes needless flushes of the SHM workspace. + These flushes show up in stats as "SHM flushes due to overflow". + Minimum is 4096 bytes. + +shortlived + - Units: s + - Default: 10.0 - Bytes of shmlog workspace allocated for worker threads. If too big, it wastes some ram, if too small it - causes needless flushes of the SHM workspace. - These flushes show up in stats as "SHM flushes due to overflow". - Minimum is 4096 bytes. + Objects created with TTL shorter than this are always put in transient storage. syslog_cli_traffic - Units: bool - Default: on + - Units: bool + - Default: on - Log all CLI traffic to syslog(LOG_INFO). + Log all CLI traffic to syslog(LOG_INFO). thread_pool_add_delay - Units: milliseconds - Default: 20 - Flags: experimental + - Units: milliseconds + - Default: 20 + - Flags: experimental - Wait at least this long between creating threads. + Wait at least this long between creating threads. - Setting this too long results in insuffient worker threads. + Setting this too long results in insuffient worker threads. - Setting this too short increases the risk of worker thread pile-up. + Setting this too short increases the risk of worker thread pile-up. thread_pool_add_threshold - Units: requests - Default: 2 - Flags: experimental + - Units: requests + - Default: 2 + - Flags: experimental - Overflow threshold for worker thread creation. + Overflow threshold for worker thread creation. - Setting this too low, will result in excess worker threads, which is generally a bad idea. + Setting this too low, will result in excess worker threads, which is generally a bad idea. - Setting it too high results in insuffient worker threads. + Setting it too high results in insuffient worker threads. thread_pool_fail_delay - Units: milliseconds - Default: 200 - Flags: experimental + - Units: milliseconds + - Default: 200 + - Flags: experimental - Wait at least this long after a failed thread creation before trying to create another thread. + Wait at least this long after a failed thread creation before trying to create another thread. - Failure to create a worker thread is often a sign that the end is near, because the process is running out - of RAM resources for thread stacks. - This delay tries to not rush it on needlessly. + Failure to create a worker thread is often a sign that the end is near, because the process is running out of RAM resources for thread stacks. + This delay tries to not rush it on needlessly. - If thread creation failures are a problem, check that thread_pool_max is not too high. + If thread creation failures are a problem, check that thread_pool_max is not too high. - It may also help to increase thread_pool_timeout and thread_pool_min, to reduce the rate at which treads - are destroyed and later recreated. + It may also help to increase thread_pool_timeout and thread_pool_min, to reduce the rate at which treads are destroyed and later recreated. thread_pool_max - Units: threads - Default: 500 - Flags: delayed, experimental + - Units: threads + - Default: 500 + - Flags: delayed, experimental - The maximum number of worker threads in all pools combined. + The maximum number of worker threads in all pools combined. - Do not set this higher than you have to, since excess worker threads soak up RAM and CPU and generally just - get in the way of getting work done. + Do not set this higher than you have to, since excess worker threads soak up RAM and CPU and generally just get in the way of getting work done. thread_pool_min - Units: threads - Default: 5 - Flags: delayed, experimental + - Units: threads + - Default: 5 + - Flags: delayed, experimental - The minimum number of threads in each worker pool. + The minimum number of threads in each worker pool. - Increasing this may help ramp up faster from low load situations where threads have expired. + Increasing this may help ramp up faster from low load situations where threads have expired. - Minimum is 2 threads. + Minimum is 2 threads. thread_pool_purge_delay - Units: milliseconds - Default: 1000 - Flags: delayed, experimental + - Units: milliseconds + - Default: 1000 + - Flags: delayed, experimental - Wait this long between purging threads. + Wait this long between purging threads. - This controls the decay of thread pools when idle(-ish). + This controls the decay of thread pools when idle(-ish). - Minimum is 100 milliseconds. + Minimum is 100 milliseconds. thread_pool_stack - Units: bytes - Default: -1 - Flags: experimental + - Units: bytes + - Default: -1 + - Flags: experimental - Worker thread stack size. In particular on 32bit systems you may need to tweak this down to fit many - threads into the limited address space. + Worker thread stack size. + On 32bit systems you may need to tweak this down to fit many threads into the limited address space. thread_pool_timeout - Units: seconds - Default: 300 - Flags: delayed, experimental + - Units: seconds + - Default: 300 + - Flags: delayed, experimental - Thread idle threshold. + Thread idle threshold. - Threads in excess of thread_pool_min, which have been idle for at least this long are candidates for purg? - ing. + Threads in excess of thread_pool_min, which have been idle for at least this long are candidates for purging. - Minimum is 1 second. + Minimum is 1 second. thread_pools - Units: pools - Default: 2 - Flags: delayed, experimental + - Units: pools + - Default: 2 + - Flags: delayed, experimental - Number of worker thread pools. + Number of worker thread pools. - Increasing number of worker pools decreases lock contention. + Increasing number of worker pools decreases lock contention. - Too many pools waste CPU and RAM resources, and more than one pool for each CPU is probably detrimal to - performance. + Too many pools waste CPU and RAM resources, and more than one pool for each CPU is probably detrimal to performance. - Can be increased on the fly, but decreases require a restart to take effect. + Can be increased on the fly, but decreases require a restart to take effect. thread_stats_rate - Units: requests - Default: 10 - Flags: experimental + - Units: requests + - Default: 10 + - Flags: experimental - Worker threads accumulate statistics, and dump these into the global stats counters if the lock is free - when they finish a request. - This parameters defines the maximum number of requests a worker thread may handle, before it is forced to - dump its accumulated stats into the global counters. + Worker threads accumulate statistics, and dump these into the global stats counters if the lock is free when they finish a request. + This parameters defines the maximum number of requests a worker thread may handle, before it is forced to dump its accumulated stats into the global counters. -user Default: ..... - Flags: must_restart +user + - Default: magic + - Flags: must_restart - The unprivileged user to run as. Setting this will also set "group" to the specified user's primary group. + The unprivileged user to run as. Setting this will also set "group" to the specified user's primary group. + +vcc_err_unref + - Units: bool + - Default: on + + Unreferenced VCL objects result in error. + +vcl_dir + - Default: /usr/local/etc/varnish + + Directory from which relative VCL filenames (vcl.load and include) are opened. vcl_trace - Units: bool - Default: off + - Units: bool + - Default: off + + Trace VCL execution in the shmlog. + Enabling this will allow you to see the path each request has taken through the VCL program. + This generates a lot of logrecords so it is off by default. + +vmod_dir + - Default: /usr/local/lib/varnish/vmods - Trace VCL execution in the shmlog. - Enabling this will allow you to see the path each request has taken through the VCL program. - This generates a lot of logrecords so it is off by default. + Directory where VCL modules are to be found. waiter - Default: default - Flags: must_restart, experimental + - Default: default + - Flags: must_restart, experimental - Select the waiter kernel interface. + Select the waiter kernel interface. Purge expressions From tfheen at varnish-cache.org Thu Feb 3 13:35:45 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 14:35:45 +0100 Subject: [master] 06399ae Fix typos Message-ID: commit 06399aeb9b9e3ea07b2bc82d9e400e0f6cb69cda Author: Tollef Fog Heen Date: Thu Feb 3 14:35:19 2011 +0100 Fix typos Typos in documentation, thanks to Camiel Dobbelaar for spotting these. diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 13e7b4b..70f1b3c 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -822,8 +822,8 @@ static const struct parspec input_parspec[] = { "support for gzip to:\n" "Accept-Encoding: gzip\n\n" "Clients that do not support gzip will have their " - "Accept-Encoding header removed. For more information no how " - "gzip is implemted please see the chapter on gzip in the " + "Accept-Encoding header removed. For more information on how " + "gzip is implemented please see the chapter on gzip in the " "Varnish reference.", EXPERIMENTAL, "on", "bool" }, diff --git a/doc/sphinx/reference/varnishd.rst b/doc/sphinx/reference/varnishd.rst index 89eba40..aee66cb 100644 --- a/doc/sphinx/reference/varnishd.rst +++ b/doc/sphinx/reference/varnishd.rst @@ -519,7 +519,7 @@ http_gzip_support Accept-Encoding: gzip - Clients that do not support gzip will have their Accept-Encoding header removed. For more information no how gzip is implemted please see the chapter on gzip in the Varnish reference. + Clients that do not support gzip will have their Accept-Encoding header removed. For more information on how gzip is implemented please see the chapter on gzip in the Varnish reference. http_headers - Units: header lines From tfheen at varnish-cache.org Thu Feb 3 13:45:45 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Thu, 03 Feb 2011 14:45:45 +0100 Subject: [master] afb0a4e Fix compilation error Message-ID: commit afb0a4e2dde8f9eeea52a29b26a616eda8a60fe4 Author: Tollef Fog Heen Date: Thu Feb 3 14:38:33 2011 +0100 Fix compilation error Use strcmp and not == for comparing strings diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 70f1b3c..c8ad67c 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -1104,7 +1104,7 @@ MCF_DumpRst(void) printf("%s\n", pp->name); if (pp->units != NULL && *pp->units != '\0') printf("\t- Units: %s\n", pp->units); - printf("\t- Default: %s\n", pp->def == MAGIC_INIT_STRING ? "magic" : pp->def); + printf("\t- Default: %s\n", strcmp(pp->def,MAGIC_INIT_STRING) == 0 ? "magic" : pp->def); /* * XXX: we should mark the params with one/two flags * XXX: that say if ->min/->max are valid, so we From ingvar at varnish-cache.org Thu Feb 3 15:11:01 2011 From: ingvar at varnish-cache.org (Ingvar) Date: Thu, 03 Feb 2011 16:11:01 +0100 Subject: [master] d9e97e8 Updated specfile, so that is builds trunk again Message-ID: commit d9e97e8c61c3b69be1c46c1d0866a51c6a165bc5 Author: Ingvar Hagelund Date: Thu Feb 3 15:50:33 2011 +0100 Updated specfile, so that is builds trunk again diff --git a/redhat/varnish.spec b/redhat/varnish.spec index 39748e0..07a3926 100644 --- a/redhat/varnish.spec +++ b/redhat/varnish.spec @@ -1,11 +1,12 @@ Summary: High-performance HTTP accelerator Name: varnish Version: 3.0.0 -Release: 0.svn20101115r5543%{?dist} +Release: 0.git20110203%{?dist} License: BSD Group: System Environment/Daemons URL: http://www.varnish-cache.org/ -Source0: http://repo.varnish-cache.org/source/%{name}-%{version}.tar.gz +#Source0: http://repo.varnish-cache.org/source/%{name}-%{version}.tar.gz +Source0: %{name}-trunk.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) # The svn sources needs autoconf, automake and libtool to generate a suitable # configure script. Release tarballs would not need this @@ -71,7 +72,8 @@ Documentation files for %name #Varnish Cache is a high-performance HTTP accelerator %prep -%setup -q +#%setup -q +%setup -q -n varnish-trunk # The svn sources needs to generate a suitable configure script # Release tarballs would not need this From ingvar at varnish-cache.org Thu Feb 3 15:11:04 2011 From: ingvar at varnish-cache.org (Ingvar) Date: Thu, 03 Feb 2011 16:11:04 +0100 Subject: [master] 6ed9fc8 Updated with correct instructions on how to build a rpm package from git. Message-ID: commit 6ed9fc8c96616596684bb3a512bce678a99d7b25 Author: Ingvar Hagelund Date: Thu Feb 3 16:06:35 2011 +0100 Updated with correct instructions on how to build a rpm package from git. diff --git a/redhat/README.redhat b/redhat/README.redhat index 4b20962..bb020cf 100644 --- a/redhat/README.redhat +++ b/redhat/README.redhat @@ -5,6 +5,25 @@ build varnish with. This means GCC 3.4.6 on a standard RHEL4 system. Varnish should work fine with GCC 3.3 and above. +Building a RPM package from a git checkout +========================================== + +You may build an rpm package direct from a git checkout. Here is an +example on how you may do this: + +git clone git://git.varnish-cache.org/varnish-cache +cd varnish-cache +sed -i "s/^Release: .*/Release: 0.git$(date +%Y%m%d)%{?dist}/" \ + redhat/varnish.spec +./autogen.sh && ./configure +make dist && rpmbuild -ts varnish-trunk.tar.gz + +This builds a source rpm. Then you can, for example on a RHEL5 system, +do something like this: + +rpmbuild --define "dist .el5" --rebuild /path/to/varnish-3.0-0.git20110203.src.rpm + + Upgrading from 1.x to 2.0 ========================= There are a few changes in the vcl language from varnish-1.x to 2.0. @@ -56,28 +75,4 @@ that suits jemalloc better, you might want to change the specfile and recompile. We would very much like feedback from anyone running varnish on Fedora's own ppc64 kernel. -Building a RPM package from SVN -=============================== -You may build the package from a svn checkout. Follow the instructions -at http://varnish.projects.linpro.no/wiki/Repository to get the -source. Then enter the trunk directory and edit -varnish-cache/redhat/varnish.spec. At least bump the version and/or -release numbers, and uncomment the autogen call. You need to install -the autoconf and automake packages. Then use something like this to -build the rpm package: - -#!/bin/bash -VERSION=$( awk ' /^Version:/ { print $2 } ' varnish-cache/redhat/varnish.spec ) -tar $( find varnish-cache -type d -name .svn | \ - while read i; do - echo -n "--exclude $i " - done -) -cvzf varnish-$VERSION.tar.gz varnish-cache/* -rpmbuild -ts varnish-$VERSION.tar.gz -# End of script - -This builds a source rpm. Then you can, for example on a RHEL4 system, -do something like this: - -rpmbuild --define "dist .el4" --rebuild /path/to/varnish-2.1.4-0.svn20100826r5134.src.rpm From phk at varnish-cache.org Sun Feb 6 20:56:10 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Sun, 06 Feb 2011 21:56:10 +0100 Subject: [master] bf96632 Ooops, this is embarrasing: Move the star so the example actually makes sense... Message-ID: commit bf9663263de4337ab2d48786fc03b39379ca91d5 Author: Poul-Henning Kamp Date: Sun Feb 6 20:55:42 2011 +0000 Ooops, this is embarrasing: Move the star so the example actually makes sense... Spotted by: Ralph Corderoy diff --git a/doc/sphinx/phk/platforms.rst b/doc/sphinx/phk/platforms.rst index 130826b..4e2d617 100644 --- a/doc/sphinx/phk/platforms.rst +++ b/doc/sphinx/phk/platforms.rst @@ -14,7 +14,7 @@ But making your program run on everything is hard work very hard work. For instance, did you know that: - sizeof(void*) != sizeof(const void*) + sizeof(void*) != sizeof(const * void) is legal in a ISO-C compliant environment ? From phk at varnish-cache.org Mon Feb 7 09:48:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 10:48:27 +0100 Subject: [master] e0d21c9 Try to pay attention to actual C-syntax Message-ID: commit e0d21c963bcabf84bf753248a483e78e632318ae Author: Poul-Henning Kamp Date: Mon Feb 7 09:48:00 2011 +0000 Try to pay attention to actual C-syntax Reminded about again by: Ralph Corderoy diff --git a/doc/sphinx/phk/platforms.rst b/doc/sphinx/phk/platforms.rst index 4e2d617..2b60443 100644 --- a/doc/sphinx/phk/platforms.rst +++ b/doc/sphinx/phk/platforms.rst @@ -14,7 +14,7 @@ But making your program run on everything is hard work very hard work. For instance, did you know that: - sizeof(void*) != sizeof(const * void) + sizeof(void*) != sizeof(void * const) is legal in a ISO-C compliant environment ? From phk at varnish-cache.org Mon Feb 7 11:30:43 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 12:30:43 +0100 Subject: [master] 856edab Give the stevedore the chance to tell which LRU list a given object should be on. Message-ID: commit 856edab5225ce343069445f3f773b01ec83cd258 Author: Poul-Henning Kamp Date: Mon Feb 7 11:30:13 2011 +0000 Give the stevedore the chance to tell which LRU list a given object should be on. diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index 3afadea..f2f2ea4 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -143,7 +143,7 @@ EXP_Insert(struct object *o) assert(o->entered != 0 && !isnan(o->entered)); o->last_lru = o->entered; - lru = STV_lru(o->objstore); + lru = STV_lru(o); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); Lck_Lock(&exp_mtx); (void)update_object_when(o); @@ -180,7 +180,7 @@ EXP_Touch(struct object *o, double tnow) if (oc->flags & OC_F_LRUDONTMOVE) return; - lru = STV_lru(o->objstore); + lru = STV_lru(o); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); if (Lck_Trylock(&exp_mtx)) diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 421f950..ef61bfd 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -283,6 +283,19 @@ STV_NewObject(struct sess *sp, const char *hint, unsigned wsl, double ttl, /*-------------------------------------------------------------------*/ +static struct lru * +stv_default_getlru(const struct object *o) +{ + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC); + CHECK_OBJ_NOTNULL(o->objstore->stevedore, STEVEDORE_MAGIC); + CHECK_OBJ_NOTNULL(o->objstore->stevedore->lru, LRU_MAGIC); + return (o->objstore->stevedore->lru); +} + +/*-------------------------------------------------------------------*/ + void STV_Freestore(struct object *o) { @@ -386,11 +399,14 @@ STV_close(void) } struct lru * -STV_lru(const struct storage *st) +STV_lru(const struct object *o) { - CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC); + CHECK_OBJ_NOTNULL(o->objstore->stevedore, STEVEDORE_MAGIC); + AN(o->objstore->stevedore->getlru); - return (st->stevedore->lru); + return (o->objstore->stevedore->getlru(o)); } /*-------------------------------------------------------------------- @@ -455,6 +471,8 @@ STV_Config(const char *spec) AN(stv->alloc); if (stv->allocobj == NULL) stv->allocobj = stv_default_allocobj; + if (stv->getlru == NULL) + stv->getlru = stv_default_getlru; if (p == NULL) bprintf(stv->ident, "s%u", seq++); diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h index e1c1f0d..3951050 100644 --- a/bin/varnishd/stevedore.h +++ b/bin/varnishd/stevedore.h @@ -43,6 +43,7 @@ typedef void storage_trim_f(struct storage *, size_t size); typedef void storage_free_f(struct storage *); typedef struct object *storage_allocobj_f(struct stevedore *, struct sess *sp, unsigned ltot, const struct stv_objsecrets *); +typedef struct lru *storage_getlru_f(const struct object *); typedef void storage_close_f(const struct stevedore *); /* Prototypes for VCL variable responders */ @@ -58,6 +59,7 @@ struct stevedore { storage_init_f *init; /* called by mgt process */ storage_open_f *open; /* called by cache process */ storage_alloc_f *alloc; /* --//-- */ + storage_getlru_f *getlru; /* --//-- */ storage_trim_f *trim; /* --//-- */ storage_free_f *free; /* --//-- */ storage_close_f *close; /* --//-- */ @@ -86,7 +88,7 @@ void STV_trim(struct storage *st, size_t size); void STV_free(struct storage *st); void STV_open(void); void STV_close(void); -struct lru *STV_lru(const struct storage *st); +struct lru *STV_lru(const struct object *o); void STV_Config(const char *spec); void STV_Config_Transient(void); void STV_Freestore(struct object *o); diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 4fc062f..8b1df91 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -1427,6 +1427,19 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, return (ss); } +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_getlru(const struct object *o) +{ + struct smp_seg *sg; + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); + return (sg->lru); +} /*-------------------------------------------------------------------- * Allocate an object @@ -1552,6 +1565,7 @@ const struct stevedore smp_stevedore = { .close = smp_close, .alloc = smp_alloc, .allocobj = smp_allocobj, + .getlru = smp_getlru, .free = smp_free, .trim = smp_trim, }; From phk at varnish-cache.org Mon Feb 7 11:51:16 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 12:51:16 +0100 Subject: [master] 1d95c79 Add a debug.persistent command so we can fondle persistent storage for testing and debugging. Message-ID: commit 1d95c792bee73dead4e5a11cd85b5b4c8f0e6737 Author: Poul-Henning Kamp Date: Mon Feb 7 11:50:56 2011 +0000 Add a debug.persistent command so we can fondle persistent storage for testing and debugging. diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index fdd0308..32c6e45 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -849,6 +849,7 @@ struct vsb *SMS_Makesynth(struct object *obj); void SMS_Finish(struct object *obj); /* storage_persistent.c */ +void SMP_Init(void); void SMP_Ready(void); void SMP_NewBan(double t0, const char *ban); diff --git a/bin/varnishd/cache_main.c b/bin/varnishd/cache_main.c index 6a2fc41..d874a98 100644 --- a/bin/varnishd/cache_main.c +++ b/bin/varnishd/cache_main.c @@ -127,6 +127,7 @@ child_main(void) VCA_Init(); SMS_Init(); + SMP_Init(); STV_open(); VMOD_Init(); diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 8b1df91..94db92b 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -52,6 +52,8 @@ SVNID("$Id$") #include "stevedore.h" #include "hash_slinger.h" #include "vsha256.h" +#include "cli.h" +#include "cli_priv.h" #include "persistent.h" @@ -124,6 +126,7 @@ struct smp_sc { unsigned flags; #define SMP_F_LOADED (1 << 0) + const struct stevedore *stevedore; int fd; const char *filename; off_t mediasize; @@ -1292,6 +1295,8 @@ smp_open(const struct stevedore *st) Lck_New(&sc->mtx, lck_smp); Lck_Lock(&sc->mtx); + sc->stevedore = st; + /* We trust the parent to give us a valid silo, for good measure: */ AZ(smp_valid_silo(sc)); @@ -1569,3 +1574,40 @@ const struct stevedore smp_stevedore = { .free = smp_free, .trim = smp_trim, }; + +/*--------------------------------------------------------------------*/ + +static void +debug_persistent(struct cli *cli, const char * const * av, void *priv) +{ + struct smp_sc *sc; + struct smp_seg *sg; + + (void)priv; + + if (av[2] == NULL || av[3] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) { + if (av[2] != NULL && + strcmp(av[2], sc->stevedore->ident)) + continue; + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: %p\n", sg); + } + } + return; + } +} + +static struct cli_proto debug_cmds[] = { + { "debug.persistent", "debug.persistent", + "Persistent debugging magic\n", 0, 2, "d", debug_persistent }, + { NULL } +}; + +void +SMP_Init(void) +{ + CLI_AddFuncs(debug_cmds); +} From phk at varnish-cache.org Mon Feb 7 13:00:30 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 14:00:30 +0100 Subject: [master] b70e631 Move the next_{bot, top} to the smp_sc struct, we can only have one segment open at a time anyway. Message-ID: commit b70e63120fcf21051e02aa9a1e07c6920dacd39a Author: Poul-Henning Kamp Date: Mon Feb 7 12:59:51 2011 +0000 Move the next_{bot,top} to the smp_sc struct, we can only have one segment open at a time anyway. Add a "sync" debug command diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 94db92b..32c41cb 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -110,9 +110,6 @@ struct smp_seg { /* Only for open segment */ struct smp_object *objs; /* objdesc array */ - uint64_t next_bot; /* next alloc address bottom */ - uint64_t next_top; /* next alloc address top */ - struct smp_signctx ctx[1]; }; @@ -140,6 +137,9 @@ struct smp_sc { struct smp_seghead segments; struct smp_seg *cur_seg; + uint64_t next_bot; /* next alloc address bottom */ + uint64_t next_top; /* next alloc address top */ + uint64_t free_offset; pthread_t thread; @@ -921,9 +921,11 @@ static uint64_t smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) { - IASSERTALIGN(sc, sg->next_bot); - assert(sg->next_bot <= sg->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - return ((sg->next_top - sg->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); } /*-------------------------------------------------------------------- @@ -1176,12 +1178,12 @@ smp_new_seg(struct smp_sc *sc) /* Set up our allocation points */ sc->cur_seg = sg; - sg->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); - sg->next_top = smp_segend(sg); - sg->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - IASSERTALIGN(sc, sg->next_bot); - IASSERTALIGN(sc, sg->next_top); - sg->objs = (void*)(sc->base + sg->next_top); + sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sc->next_top = smp_segend(sg); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sc->next_bot); + IASSERTALIGN(sc, sc->next_top); + sg->objs = (void*)(sc->base + sc->next_top); sg->nalloc1 = 0; } @@ -1208,9 +1210,6 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) return; } - assert(sg->next_bot <= sg->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - IASSERTALIGN(sc, sg->next_bot); - /* * If there is enough space left, that we can move the smp_objects * down without overwriting the present copy, we will do so to @@ -1219,25 +1218,26 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) left = smp_spaceleft(sc, sg); len = sizeof(struct smp_object) * sg->nalloc1; if (len < left) { - dst = sg->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); dp = sc->base + dst; assert((uintptr_t)dp + len < (uintptr_t)sg->objs); memcpy(dp, sg->objs, len); - sg->next_top = dst; + sc->next_top = dst; sg->objs = dp; - sg->p.length = sg->next_top + len + IRNUP(sc, SMP_SIGN_SPACE); + sg->p.length = (sc->next_top - sg->p.offset) + + len + IRNUP(sc, SMP_SIGN_SPACE); (void)smp_spaceleft(sc, sg); /* for asserts */ } /* Update the segment header */ - sg->p.objlist = sg->next_top; + sg->p.objlist = sc->next_top; sg->p.nalloc = sg->nalloc1; /* Write the (empty) OBJIDX signature */ - sg->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - assert(sg->next_top >= sg->next_bot); - smp_def_sign(sc, sg->ctx, sg->next_top, "OBJIDX"); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sc->next_top >= sc->next_bot); + smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); smp_reset_sign(sg->ctx); smp_sync_sign(sg->ctx); @@ -1397,12 +1397,12 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, max_size = IRNDN(sc, left - extra); sg = sc->cur_seg; - ss = (void*)(sc->base + sg->next_bot); - sg->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + ss = (void*)(sc->base + sc->next_bot); + sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); sg->nalloc++; if (so != NULL) { - sg->next_top -= sizeof(**so); - *so = (void*)(sc->base + sg->next_top); + sc->next_top -= sizeof(**so); + *so = (void*)(sc->base + sc->next_top); /* Render this smp_object mostly harmless */ (*so)->ttl = 0.; (*so)->ban = 0.; @@ -1510,7 +1510,8 @@ static struct storage * smp_alloc(struct stevedore *st, size_t size) { - return (smp_allocx(st, 4096, size, NULL, NULL, NULL)); + return (smp_allocx(st, + size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); } /*-------------------------------------------------------------------- @@ -1578,26 +1579,65 @@ const struct stevedore smp_stevedore = { /*--------------------------------------------------------------------*/ static void +debug_report_silo(struct cli *cli, const struct smp_sc *sc) +{ + struct smp_seg *sg; + struct objcore *oc; + + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", + (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); + if (sg == sc->cur_seg) + cli_out(cli, "\t[0x%jx ... 0x%jx] = 0x%jx free\n", + (uintmax_t)(sc->next_bot), + (uintmax_t)(sc->next_top), + (uintmax_t)(sc->next_top - sc->next_bot)); + cli_out(cli, "\t%u nobj, %u alloc, %u alloc1, %u fixed\n", + sg->nobj, sg->nalloc, sg->nalloc1, sg->nfixed); + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) { + if (oc == &sg->lru->senteniel) + cli_out(cli, "\t\t(senteniel) %p\n", oc); + else + cli_out(cli, "\t\tOC: %p\n", oc); + } + } +} + +static void debug_persistent(struct cli *cli, const char * const * av, void *priv) { struct smp_sc *sc; - struct smp_seg *sg; (void)priv; - if (av[2] == NULL || av[3] == NULL) { - VTAILQ_FOREACH(sc, &silos, list) { - if (av[2] != NULL && - strcmp(av[2], sc->stevedore->ident)) - continue; - cli_out(cli, "Silo: %s (%s)\n", - sc->stevedore->ident, sc->filename); - VTAILQ_FOREACH(sg, &sc->segments, list) { - cli_out(cli, " Seg: %p\n", sg); - } - } + if (av[2] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) + debug_report_silo(cli, sc); return; } + VTAILQ_FOREACH(sc, &silos, list) + if (!strcmp(av[2], sc->stevedore->ident)) + break; + if (sc == NULL) { + cli_out(cli, "Silo <%s> not found\n", av[2]); + cli_result(cli, CLIS_PARAM); + return; + } + if (av[3] == NULL) { + debug_report_silo(cli, sc); + return; + } + Lck_Lock(&sc->mtx); + if (!strcmp(av[3], "sync")) { + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } else { + cli_out(cli, "Unknown operation\n"); + cli_result(cli, CLIS_PARAM); + } + Lck_Unlock(&sc->mtx); } static struct cli_proto debug_cmds[] = { From phk at varnish-cache.org Mon Feb 7 14:12:02 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 15:12:02 +0100 Subject: [master] b4916ea Change a fieldname that keeps confusing me. Message-ID: commit b4916ea1bbed5cdd190c41152e241cef849b99c3 Author: Poul-Henning Kamp Date: Mon Feb 7 14:11:48 2011 +0000 Change a fieldname that keeps confusing me. diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 32c41cb..ceae27d 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -105,7 +105,6 @@ struct smp_seg { uint32_t nobj; /* Number of objects */ uint32_t nalloc; /* Allocations */ - uint32_t nalloc1; /* Allocated objects */ uint32_t nfixed; /* How many fixed objects */ /* Only for open segment */ @@ -683,8 +682,8 @@ smp_find_so(const struct smp_seg *sg, const struct objcore *oc) smp_idx = oc->priv2; assert(smp_idx > 0); - assert(smp_idx <= sg->nalloc1); - so = &sg->objs[sg->nalloc1 - smp_idx]; + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; return (so); } @@ -968,8 +967,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) /* test OBJIDX */ so = (void*)(sc->base + sg->p.objlist); sg->objs = so; - sg->nalloc1 = sg->p.nalloc; - no = sg->p.nalloc; + no = sg->p.lobjlist; /* Clear the bogus "hold" count */ sg->nobj = 0; for (;no > 0; so++,no--) { @@ -1184,7 +1182,6 @@ smp_new_seg(struct smp_sc *sc) IASSERTALIGN(sc, sc->next_bot); IASSERTALIGN(sc, sc->next_top); sg->objs = (void*)(sc->base + sc->next_top); - sg->nalloc1 = 0; } /*-------------------------------------------------------------------- @@ -1216,7 +1213,7 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) * compact the segment. */ left = smp_spaceleft(sc, sg); - len = sizeof(struct smp_object) * sg->nalloc1; + len = sizeof(struct smp_object) * sg->p.lobjlist; if (len < left) { dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); dp = sc->base + dst; @@ -1232,7 +1229,6 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) /* Update the segment header */ sg->p.objlist = sc->next_top; - sg->p.nalloc = sg->nalloc1; /* Write the (empty) OBJIDX signature */ sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); @@ -1408,7 +1404,7 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, (*so)->ban = 0.; (*so)->ptr = NULL; sg->objs = *so; - *idx = ++sg->nalloc1; + *idx = ++sg->p.lobjlist; } (void)smp_spaceleft(sc, sg); /* for the assert */ } @@ -1594,8 +1590,8 @@ debug_report_silo(struct cli *cli, const struct smp_sc *sc) (uintmax_t)(sc->next_bot), (uintmax_t)(sc->next_top), (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, "\t%u nobj, %u alloc, %u alloc1, %u fixed\n", - sg->nobj, sg->nalloc, sg->nalloc1, sg->nfixed); + cli_out(cli, "\t%u nobj, %u alloc, %u lobjlist, %u fixed\n", + sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) { if (oc == &sg->lru->senteniel) cli_out(cli, "\t\t(senteniel) %p\n", oc); diff --git a/include/persistent.h b/include/persistent.h index 596a7ed..6aebd40 100644 --- a/include/persistent.h +++ b/include/persistent.h @@ -122,10 +122,10 @@ struct smp_sign { */ struct smp_segptr { - uint64_t offset; - uint64_t length; - uint64_t objlist; - uint32_t nalloc; + uint64_t offset; /* rel to silo */ + uint64_t length; /* rel to offset */ + uint64_t objlist; /* rel to silo */ + uint32_t lobjlist; /* len of objlist */ }; /* From phk at varnish-cache.org Mon Feb 7 16:49:20 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 17:49:20 +0100 Subject: [master] 959a7b3 Release the esi_data storage when we release the object storage it refers to. Message-ID: commit 959a7b3e02c78a03da7706eeac27088245fd887e Author: Poul-Henning Kamp Date: Mon Feb 7 16:48:53 2011 +0000 Release the esi_data storage when we release the object storage it refers to. diff --git a/bin/varnishd/cache_hash.c b/bin/varnishd/cache_hash.c index 3694a75..88a809c 100644 --- a/bin/varnishd/cache_hash.c +++ b/bin/varnishd/cache_hash.c @@ -691,10 +691,6 @@ HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo) DSL(0x40, SLT_Debug, 0, "Object %u workspace min free %u", o->xid, WS_Free(o->ws_o)); - if (o->esidata != NULL) { - STV_free(o->esidata); - o->esidata = NULL; - } if (oc != NULL) oc_freeobj(oc); w->stats.n_object--; diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index ef61bfd..8e0ee36 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -301,6 +301,10 @@ STV_Freestore(struct object *o) { struct storage *st, *stn; + if (o->esidata != NULL) { + STV_free(o->esidata); + o->esidata = NULL; + } VTAILQ_FOREACH_SAFE(st, &o->store, list, stn) { CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC); VTAILQ_REMOVE(&o->store, st, list); From phk at varnish-cache.org Mon Feb 7 16:59:12 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 17:59:12 +0100 Subject: [master] cf64e7d Duh! Don't leak all the transient storage we use for pass requests. Message-ID: commit cf64e7dec3c3624ccf593496f6c858f741d10ec6 Author: Poul-Henning Kamp Date: Mon Feb 7 16:58:43 2011 +0000 Duh! Don't leak all the transient storage we use for pass requests. diff --git a/bin/varnishd/cache_hash.c b/bin/varnishd/cache_hash.c index 88a809c..02f9016 100644 --- a/bin/varnishd/cache_hash.c +++ b/bin/varnishd/cache_hash.c @@ -683,16 +683,22 @@ HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo) return (r); } + if (oc != NULL) { + BAN_DestroyObj(oc); + AZ(oc->ban); + } + if (o != NULL) { - if (oc != NULL) { - BAN_DestroyObj(oc); - AZ(oc->ban); - } DSL(0x40, SLT_Debug, 0, "Object %u workspace min free %u", o->xid, WS_Free(o->ws_o)); if (oc != NULL) oc_freeobj(oc); + else { + STV_Freestore(o); + STV_free(o->objstore); + o = NULL; + } w->stats.n_object--; } diff --git a/bin/varnishtest/tests/b00002.vtc b/bin/varnishtest/tests/b00002.vtc index aaa8c5d..f1b5f48 100644 --- a/bin/varnishtest/tests/b00002.vtc +++ b/bin/varnishtest/tests/b00002.vtc @@ -23,6 +23,7 @@ client c1 { delay .1 varnish v1 -expect n_object == 0 +varnish v1 -expect SMA.Transient.nobj == 0 varnish v1 -expect client_conn == 1 varnish v1 -expect client_req == 1 varnish v1 -expect s_sess == 1 From phk at varnish-cache.org Mon Feb 7 18:03:45 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 19:03:45 +0100 Subject: [master] bc2f3f0 Get object/objcore unref events sent through to the stevedore (via objcore method) and refer resultant empty persistant segments. Message-ID: commit bc2f3f06764bec1aca49a0d21929838646bbcb3a Author: Poul-Henning Kamp Date: Mon Feb 7 18:02:36 2011 +0000 Get object/objcore unref events sent through to the stevedore (via objcore method) and refer resultant empty persistant segments. diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index f2f2ea4..bc791f1 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -239,7 +239,6 @@ static void * __match_proto__(void *start_routine(void *)) exp_timer(struct sess *sp, void *priv) { struct objcore *oc; - struct object *o; double t; (void)priv; @@ -282,20 +281,7 @@ exp_timer(struct sess *sp, void *priv) VSC_main->n_expired++; CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC); - if (oc->methods == &default_oc_methods) { - o = oc_getobj(sp->wrk, oc); - AN(o); - WSL(sp->wrk, SLT_ExpKill, 0, "%u %d", - o->xid, (int)(o->ttl - t)); - (void)HSH_Deref(sp->wrk, NULL, &o); - } else { - WSL(sp->wrk, SLT_ExpKill, 1, "-1 %d", - (int)(oc->timer_when - t)); - - oc->priv = NULL; - AZ(HSH_Deref(sp->wrk, oc, NULL)); - sp->wrk->stats.n_vampireobject--; - } + (void)HSH_Deref(sp->wrk, oc, NULL); } NEEDLESS_RETURN(NULL); } diff --git a/bin/varnishd/cache_hash.c b/bin/varnishd/cache_hash.c index 02f9016..a33bf5e 100644 --- a/bin/varnishd/cache_hash.c +++ b/bin/varnishd/cache_hash.c @@ -648,65 +648,61 @@ HSH_Ref(struct objcore *oc) int HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo) { - struct object *o; - struct objhead *oh = NULL; + struct object *o = NULL; + struct objhead *oh; unsigned r; - if (oc != NULL) { - AZ(oo); - o = NULL; - AZ(oc->priv); // XXX: for now - } else { - AZ(oc); - AN(oo); + /* Only one arg at a time */ + assert(oc == NULL || oo == NULL); + + if (oo != NULL) { o = *oo; *oo = NULL; CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); oc = o->objcore; } - if (oc != NULL) { - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - oh = oc->objhead; - CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC); - - Lck_Lock(&oh->mtx); - assert(oh->refcnt > 0); - assert(oc->refcnt > 0); - r = --oc->refcnt; - if (!r) - VTAILQ_REMOVE(&oh->objcs, oc, list); - if (oc->flags & OC_F_BUSY) - hsh_rush(oh); - Lck_Unlock(&oh->mtx); - if (r != 0) - return (r); + if (o != NULL && oc == NULL) { + /* + * A pass object with neither objcore nor objhdr reference. + * -> simply free the (Transient) storage + */ + STV_Freestore(o); + STV_free(o->objstore); + w->stats.n_object--; + return (0); } - if (oc != NULL) { - BAN_DestroyObj(oc); - AZ(oc->ban); - } + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (o != NULL) { - DSL(0x40, SLT_Debug, 0, "Object %u workspace min free %u", - o->xid, WS_Free(o->ws_o)); + oh = oc->objhead; + CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC); - if (oc != NULL) - oc_freeobj(oc); - else { - STV_Freestore(o); - STV_free(o->objstore); - o = NULL; - } - w->stats.n_object--; + Lck_Lock(&oh->mtx); + assert(oh->refcnt > 0); + assert(oc->refcnt > 0); + r = --oc->refcnt; + if (!r) + VTAILQ_REMOVE(&oh->objcs, oc, list); + else { + /* Must have an object */ + AN(oc->methods); } + if (oc->flags & OC_F_BUSY) + hsh_rush(oh); + Lck_Unlock(&oh->mtx); + if (r != 0) + return (r); - if (oc == NULL) - return (0); + BAN_DestroyObj(oc); + AZ(oc->ban); - AN(oh); + if (oc->methods != NULL) { + oc_freeobj(oc); + w->stats.n_object--; + } FREE_OBJ(oc); + w->stats.n_objectcore--; /* Drop our ref on the objhead */ assert(oh->refcnt > 0); diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index ceae27d..9a45cf3 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -774,16 +774,13 @@ smp_oc_updatemeta(struct objcore *oc) } } -static void +static void __match_proto__() smp_oc_freeobj(struct objcore *oc) { struct smp_seg *sg; - struct object *o; struct smp_object *so; CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); so = smp_find_so(sg, oc); From phk at varnish-cache.org Mon Feb 7 20:04:19 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 21:04:19 +0100 Subject: [master] 8edfa76 Various polishing while I wait for the long test run to finish: Message-ID: commit 8edfa76ca60a77a399a49ab80137436a914046b0 Author: Poul-Henning Kamp Date: Mon Feb 7 20:02:48 2011 +0000 Various polishing while I wait for the long test run to finish: Remove the aim_nobj, it has been found not useful. Remove pointer from persistent.h, it shouldn't contain any. Document debug.persistent, it will be useful for debugging. Exercise it in testcase. diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 9a45cf3..9868d8d 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -161,8 +161,6 @@ struct smp_sc { unsigned aim_nseg; unsigned max_nseg; - unsigned aim_nobj; - uint64_t min_segl; uint64_t aim_segl; uint64_t max_segl; @@ -486,7 +484,7 @@ smp_metrics(struct smp_sc *sc) * * XXX: This should possibly depend on the size of the silo so * XXX: trivially small silos do not run into trouble along - * XXX: the lines of "one object per silo". + * XXX: the lines of "one object per segment". */ sc->min_nseg = 10; @@ -528,16 +526,6 @@ smp_metrics(struct smp_sc *sc) sc->aim_nseg, (uintmax_t)sc->aim_segl); /* - * Objects per segment - * - * XXX: calculate size of minimum object (workspace, http etc) - */ - - sc->aim_nobj = sc->max_segl / 4000; - - fprintf(stderr, "aim_nobj = %u\n", sc->aim_nobj); - - /* * How much space in the free reserve pool ? */ sc->free_reserve = sc->aim_segl * 10; @@ -708,7 +696,7 @@ smp_oc_getobj(struct worker *wrk, struct objcore *oc) CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); so = smp_find_so(sg, oc); - o = so->ptr; + o = (void*)(sg->sc->base + so->ptr); /* * The object may not be in this segment since we allocate it * In a separate operation than the smp_object. We could check @@ -1220,8 +1208,8 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) sg->objs = dp; sg->p.length = (sc->next_top - sg->p.offset) + len + IRNUP(sc, SMP_SIGN_SPACE); - (void)smp_spaceleft(sc, sg); /* for asserts */ - + (void)smp_spaceleft(sc, sg); /* for the asserts */ + } /* Update the segment header */ @@ -1347,7 +1335,7 @@ smp_close(const struct stevedore *st) * Allocate [min_size...max_size] space from the bottom of the segment, * as is convenient. * - * If 'so' + 'idx' is given, also allocate a smp_object from the top + * If 'so' + 'idx' is given, also allocate a smp_object from the top * of the segment. * * Return the segment in 'ssg' if given. @@ -1380,7 +1368,7 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, ss = NULL; for (tries = 0; tries < 3; tries++) { left = smp_spaceleft(sc, sc->cur_seg); - if (left >= extra + min_size) + if (left >= extra + min_size) break; smp_close_seg(sc, sc->cur_seg); smp_new_seg(sc); @@ -1399,7 +1387,7 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, /* Render this smp_object mostly harmless */ (*so)->ttl = 0.; (*so)->ban = 0.; - (*so)->ptr = NULL; + (*so)->ptr = 0;; sg->objs = *so; *idx = ++sg->p.lobjlist; } @@ -1484,7 +1472,7 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, assert(sizeof so->hash == DIGEST_LEN); memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); so->ttl = o->ttl; /* XXX: grace? */ - so->ptr = o; + so->ptr = (uint8_t*)o - sc->base; so->ban = o->ban_t; oc->priv = sg; @@ -1503,7 +1491,7 @@ static struct storage * smp_alloc(struct stevedore *st, size_t size) { - return (smp_allocx(st, + return (smp_allocx(st, size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); } @@ -1546,7 +1534,7 @@ SMP_Ready(void) ASSERT_CLI(); do { - VTAILQ_FOREACH(sc, &silos, list) + VTAILQ_FOREACH(sc, &silos, list) if (!(sc->flags & SMP_F_LOADED)) break; if (sc != NULL) @@ -1569,10 +1557,13 @@ const struct stevedore smp_stevedore = { .trim = smp_trim, }; -/*--------------------------------------------------------------------*/ +/*-------------------------------------------------------------------- + * Persistence is a bear to test unadultered, so we cheat by adding + * a cli command we can use to make it do tricks for us. + */ static void -debug_report_silo(struct cli *cli, const struct smp_sc *sc) +debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) { struct smp_seg *sg; struct objcore *oc; @@ -1582,18 +1573,19 @@ debug_report_silo(struct cli *cli, const struct smp_sc *sc) VTAILQ_FOREACH(sg, &sc->segments, list) { cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); - if (sg == sc->cur_seg) - cli_out(cli, "\t[0x%jx ... 0x%jx] = 0x%jx free\n", + if (sg == sc->cur_seg) + cli_out(cli, + " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", (uintmax_t)(sc->next_bot), (uintmax_t)(sc->next_top), (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, "\t%u nobj, %u alloc, %u lobjlist, %u fixed\n", + cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) { - if (oc == &sg->lru->senteniel) - cli_out(cli, "\t\t(senteniel) %p\n", oc); - else - cli_out(cli, "\t\tOC: %p\n", oc); + if (objs) { + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " %s %p\n", + oc == &sg->lru->senteniel ? + "senteniel" : "OC: ", oc); } } } @@ -1607,7 +1599,7 @@ debug_persistent(struct cli *cli, const char * const * av, void *priv) if (av[2] == NULL) { VTAILQ_FOREACH(sc, &silos, list) - debug_report_silo(cli, sc); + debug_report_silo(cli, sc, 0); return; } VTAILQ_FOREACH(sc, &silos, list) @@ -1619,13 +1611,15 @@ debug_persistent(struct cli *cli, const char * const * av, void *priv) return; } if (av[3] == NULL) { - debug_report_silo(cli, sc); + debug_report_silo(cli, sc, 0); return; } Lck_Lock(&sc->mtx); if (!strcmp(av[3], "sync")) { smp_close_seg(sc, sc->cur_seg); smp_new_seg(sc); + } else if (!strcmp(av[3], "dump")) { + debug_report_silo(cli, sc, 1); } else { cli_out(cli, "Unknown operation\n"); cli_result(cli, CLIS_PARAM); @@ -1635,7 +1629,14 @@ debug_persistent(struct cli *cli, const char * const * av, void *priv) static struct cli_proto debug_cmds[] = { { "debug.persistent", "debug.persistent", - "Persistent debugging magic\n", 0, 2, "d", debug_persistent }, + "Persistent debugging magic:\n" + "\tdebug.persistent [stevedore [cmd]]\n" + "With no cmd arg, a summary of the silo is returned.\n" + "Possible commands:\n" + "\tsync\tClose current segment, open a new one\n" + "\tdump\tinclude objcores in silo summary\n" + "", + 0, 2, "d", debug_persistent }, { NULL } }; diff --git a/bin/varnishtest/tests/p00000.vtc b/bin/varnishtest/tests/p00000.vtc index 1e4f42c..6cea757 100644 --- a/bin/varnishtest/tests/p00000.vtc +++ b/bin/varnishtest/tests/p00000.vtc @@ -25,6 +25,9 @@ client c1 { expect resp.http.X-Varnish == "1001" } -run +varnish v1 -cliok "storage.list" +varnish v1 -cliok "debug.persistent s0 dump" +varnish v1 -cliok "debug.persistent s0 sync" varnish v1 -stop varnish v1 -start diff --git a/include/persistent.h b/include/persistent.h index 6aebd40..8a355a7 100644 --- a/include/persistent.h +++ b/include/persistent.h @@ -136,8 +136,8 @@ struct smp_segptr { */ struct smp_object { - unsigned char hash[32]; + uint8_t hash[32]; /* really: DIGEST_LEN */ double ttl; double ban; - struct object *ptr; + uint64_t ptr; /* rel to silo */ }; From phk at varnish-cache.org Mon Feb 7 21:12:57 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 22:12:57 +0100 Subject: [master] f9a8797 Ohh, man... Message-ID: commit f9a8797dedf642768a54d47248881ed2284ed590 Author: Poul-Henning Kamp Date: Mon Feb 7 21:08:09 2011 +0000 Ohh, man... So imagine an object during fetch, where we have allocated the storage for the object structure, the persistent silo gets synced, so the data ends up in the next segment, and then we crash before that segment gets synched to silo. On restart the object looks good, until we try to access its storage... *bewm* This is a stopgap, that catches such objects and neuters them, using a set of paranoid sanitychecks we should employ in any case. There still is a relevant hole: As above, but after the restart we manage to write a new segment before the initial object is accessed, and it happens to have a storage structure just the same place (not unlikely at the beginning) We do not crash in this case, but deliver wrong content. Did I ever mention that -spersistent for all practical purposes is a filesytem ? diff --git a/bin/varnishtest/tests/p00007.vtc b/bin/varnishtest/tests/p00007.vtc new file mode 100644 index 0000000..8d0a18c --- /dev/null +++ b/bin/varnishtest/tests/p00007.vtc @@ -0,0 +1,72 @@ +# $Id$ + +test "test reload of object spanning incomplete segment" + +server s1 { + rxreq + expect req.url == "/1" + send "HTTP/1.1 200 Ok\n" + send "Transfer-encoding: chunked\n" + send "\n" + chunkedlen 32 + # Tell top-level that it can sync the stevedore + sema r1 sync 2 + # Top-level tells us it has synched the stevedore + sema r1 sync 2 + chunkedlen 32 + chunkedlen 0 + accept + + rxreq + expect req.url == "/2" + txresp -bodylen 100 + + rxreq + expect req.url == "/1" + txresp -bodylen 48 +} -start + +varnish v1 -storage "-spersistent,${tmpdir}/_.per,10m" \ + -vcl+backend {} -start + +varnish v1 -cliok "debug.fragfetch 32" + +client c1 { + txreq -url "/1" + rxresp + expect resp.bodylen == 64 +} -start + +# Wait for first chunk to have been sent +sema r1 sync 2 +delay .2 + +# Sync the stevedore, so the next chunk ends up i segment 2 +varnish v1 -cliok "debug.persistent s0 sync" + +# Tell server to continue +sema r1 sync 2 + +# Get the result +client c1 -wait + +varnish v1 -cliok "debug.persistent s0 dump" + +# Panic worker so second segment does not get closed +varnish v1 -clierr 400 "debug.panic.worker" + +# start again +varnish v1 -start + +client c1 { + # Make sure there is not a valid "struct storage" in second seg. + txreq -url "/2" + rxresp + expect resp.bodylen == 100 + + # Fetch the vampire object and see how that goes... + txreq -url "/1" + rxresp + expect resp.bodylen == 48 +} -run + From phk at varnish-cache.org Mon Feb 7 21:39:28 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 07 Feb 2011 22:39:28 +0100 Subject: [master] 99c0a17 Hmm, I still don't understand the args to git commit it seems... Message-ID: commit 99c0a172f83ca4141a8ca9ee8bd095ba6ee36e64 Author: Poul-Henning Kamp Date: Mon Feb 7 21:38:22 2011 +0000 Hmm, I still don't understand the args to git commit it seems... Commit the code that prevents all the evils mentionend in the previous commit message (see tests/p00007.vtc) diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 9868d8d..aab14bb 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -101,7 +101,9 @@ struct smp_seg { struct smp_segptr p; - unsigned must_load; + unsigned flags; +#define SMP_SEG_MUSTLOAD (1 << 0) +#define SMP_SEG_LOADED (1 << 1) uint32_t nobj; /* Number of objects */ uint32_t nalloc; /* Allocations */ @@ -120,7 +122,7 @@ struct smp_sc { struct stevedore *parent; unsigned flags; -#define SMP_F_LOADED (1 << 0) +#define SMP_SC_LOADED (1 << 0) const struct stevedore *stevedore; int fd; @@ -676,6 +678,54 @@ smp_find_so(const struct smp_seg *sg, const struct objcore *oc) } /*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use + */ + +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) +{ + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; + + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; + + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ + + o = pst - sc->base; + + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) + break; + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ + + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ + + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ + + if (st->len > st->space) + return (0x40); /* Plain bad... */ + + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); +} + +/*--------------------------------------------------------------------- * objcore methods for persistent objects */ @@ -685,6 +735,9 @@ smp_oc_getobj(struct worker *wrk, struct objcore *oc) struct object *o; struct smp_seg *sg; struct smp_object *so; + struct storage *st; + uint64_t l; + int bad; /* Some calls are direct, but they should match anyway */ assert(oc->methods->getobj == smp_oc_getobj); @@ -717,9 +770,26 @@ smp_oc_getobj(struct worker *wrk, struct objcore *oc) Lck_Lock(&sg->sc->mtx); /* Check again, we might have raced. */ if (oc->flags & OC_F_NEEDFIXUP) { - /* refcnt is >=1 because the object is in the hash */ + /* We trust caller to have a refcnt for us */ o->objcore = oc; + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; + + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } + sg->nfixed++; wrk->stats.n_object++; wrk->stats.n_vampireobject--; @@ -939,8 +1009,8 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - assert(sg->must_load == 1); - sg->must_load = 0; + assert(sg->flags & SMP_SEG_MUSTLOAD); + sg->flags &= ~SMP_SEG_MUSTLOAD; AN(sg->p.offset); if (sg->p.objlist == 0) return; @@ -975,6 +1045,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) sg->nobj++; } WRK_SumStat(sp->wrk); + sg->flags |= SMP_SEG_LOADED; } /*-------------------------------------------------------------------- @@ -1062,7 +1133,7 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); sg->p = *ss; - sg->must_load = 1; + sg->flags |= SMP_SEG_MUSTLOAD; /* * HACK: prevent save_segs from nuking segment until we have @@ -1248,10 +1319,10 @@ smp_thread(struct sess *sp, void *priv) /* First, load all the objects from all segments */ VTAILQ_FOREACH(sg, &sc->segments, list) - if (sg->must_load) + if (sg->flags & SMP_SEG_MUSTLOAD) smp_load_seg(sp, sc, sg); - sc->flags |= SMP_F_LOADED; + sc->flags |= SMP_SC_LOADED; BAN_Deref(&sc->tailban); sc->tailban = NULL; printf("Silo completely loaded\n"); @@ -1535,7 +1606,7 @@ SMP_Ready(void) ASSERT_CLI(); do { VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_F_LOADED)) + if (!(sc->flags & SMP_SC_LOADED)) break; if (sc != NULL) (void)sleep(1); From tfheen at varnish-cache.org Tue Feb 8 06:41:58 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Tue, 08 Feb 2011 07:41:58 +0100 Subject: [master] 13add5f Reduce the default thread_add_delay to 2ms Message-ID: commit 13add5fd5ec66bc96e8af7b793fc7e92243749de Author: Tollef Fog Heen Date: Mon Feb 7 16:45:07 2011 +0100 Reduce the default thread_add_delay to 2ms Experience shows that 2ms is a better value for thread_add_delay. This is high enough to prevent thread pileups and low enough that we do not run into as too few thread problems at startup. diff --git a/bin/varnishd/mgt_pool.c b/bin/varnishd/mgt_pool.c index 029b702..86468ca 100644 --- a/bin/varnishd/mgt_pool.c +++ b/bin/varnishd/mgt_pool.c @@ -175,8 +175,8 @@ const struct parspec WRK_parspec[] = { "\n" "Setting this too short increases the risk of worker " "thread pile-up.\n", - EXPERIMENTAL, - "20", "milliseconds" }, + 0, + "2", "milliseconds" }, { "thread_pool_fail_delay", tweak_timeout, &master.wthread_fail_delay, 100, UINT_MAX, "Wait at least this long after a failed thread creation " From phk at varnish-cache.org Tue Feb 8 10:50:29 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:29 +0100 Subject: [master] 7a081be Clone this from storage_persistent.c Message-ID: commit 7a081be962244c7de73441bfebb28dd4dd95804d Author: Poul-Henning Kamp Date: Tue Feb 8 09:40:38 2011 +0000 Clone this from storage_persistent.c diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h new file mode 100644 index 0000000..aab14bb --- /dev/null +++ b/bin/varnishd/storage_persistent.h @@ -0,0 +1,1718 @@ +/*- + * Copyright (c) 2008-2010 Linpro AS + * All rights reserved. + * + * Author: Poul-Henning Kamp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Persistent storage method + * + * XXX: Before we start the client or maybe after it stops, we should give the + * XXX: stevedores a chance to examine their storage for consistency. + * + * XXX: Do we ever free the LRU-lists ? + */ + +#include "config.h" + +#include "svnid.h" +SVNID("$Id$") + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cache.h" +#include "stevedore.h" +#include "hash_slinger.h" +#include "vsha256.h" +#include "cli.h" +#include "cli_priv.h" + +#include "persistent.h" + +#ifndef MAP_NOCORE +#define MAP_NOCORE 0 /* XXX Linux */ +#endif + +#ifndef MAP_NOSYNC +#define MAP_NOSYNC 0 /* XXX Linux */ +#endif + +#define ASSERT_SILO_THREAD(sc) \ + do {assert(pthread_self() == (sc)->thread);} while (0) + +#define OC_F_NEEDFIXUP OC_F_PRIV + +/* + * Context for a signature. + * + * A signature is a sequence of bytes in the silo, signed by a SHA256 hash + * which follows the bytes. + * + * The context structure allows us to append to a signature without + * recalculating the entire SHA256 hash. + */ + +struct smp_signctx { + struct smp_sign *ss; + struct SHA256Context ctx; + uint32_t unique; + const char *id; +}; + +struct smp_sc; + +/* XXX: name confusion with on-media version ? */ +struct smp_seg { + unsigned magic; +#define SMP_SEG_MAGIC 0x45c61895 + + struct smp_sc *sc; + struct lru *lru; + + VTAILQ_ENTRY(smp_seg) list; /* on smp_sc.smp_segments */ + + struct smp_segptr p; + + unsigned flags; +#define SMP_SEG_MUSTLOAD (1 << 0) +#define SMP_SEG_LOADED (1 << 1) + + uint32_t nobj; /* Number of objects */ + uint32_t nalloc; /* Allocations */ + uint32_t nfixed; /* How many fixed objects */ + + /* Only for open segment */ + struct smp_object *objs; /* objdesc array */ + struct smp_signctx ctx[1]; +}; + +VTAILQ_HEAD(smp_seghead, smp_seg); + +struct smp_sc { + unsigned magic; +#define SMP_SC_MAGIC 0x7b73af0a + struct stevedore *parent; + + unsigned flags; +#define SMP_SC_LOADED (1 << 0) + + const struct stevedore *stevedore; + int fd; + const char *filename; + off_t mediasize; + uint64_t align; /* 64b to avoid casts */ + uint32_t granularity; + uint32_t unique; + + uint8_t *base; + + struct smp_ident *ident; + + struct smp_seghead segments; + struct smp_seg *cur_seg; + uint64_t next_bot; /* next alloc address bottom */ + uint64_t next_top; /* next alloc address top */ + + uint64_t free_offset; + + pthread_t thread; + + VTAILQ_ENTRY(smp_sc) list; + + struct smp_signctx idn; + struct smp_signctx ban1; + struct smp_signctx ban2; + struct smp_signctx seg1; + struct smp_signctx seg2; + + struct ban *tailban; + + struct lock mtx; + + /* Cleaner metrics */ + + unsigned min_nseg; + unsigned aim_nseg; + unsigned max_nseg; + + uint64_t min_segl; + uint64_t aim_segl; + uint64_t max_segl; + + uint64_t free_reserve; +}; + +/*--------------------------------------------------------------------*/ + +/* Generic power-2 rounding */ +#define PWR2(x) ((((x)-1)&(x))==0) /* Is a power of two */ +#define RDN2(x, y) ((x)&(~((y)-1))) /* if y is powers of two */ +#define RUP2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ + +/* Pointer round up/down & assert */ +#define PRNDN(sc, x) ((void*)RDN2((uintptr_t)(x), sc->align)) +#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)(x), sc->align)) +#define PASSERTALIGN(sc, x) assert(PRNDN(sc, x) == (x)) + +/* Integer round up/down & assert */ +#define IRNDN(sc, x) RDN2(x, sc->align) +#define IRNUP(sc, x) RUP2(x, sc->align) +#define IASSERTALIGN(sc, x) assert(IRNDN(sc, x) == (x)) + +/*--------------------------------------------------------------------*/ + +#define ASSERT_PTR_IN_SILO(sc, ptr) \ + assert((const void*)(ptr) >= (const void*)((sc)->base) && \ + (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize)) + +/*--------------------------------------------------------------------*/ + +/* + * silos is unlocked, it only changes during startup when we are + * single-threaded + */ +static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); + +/*-------------------------------------------------------------------- + * SIGNATURE functions + * The signature is SHA256 over: + * 1. The smp_sign struct up to but not including the length field. + * 2. smp_sign->length bytes, starting after the smp_sign structure + * 3. The smp-sign->length field. + * The signature is stored after the byte-range from step 2. + */ + +#define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) +#define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) + +/*-------------------------------------------------------------------- + * Define a signature by location and identifier. + */ + +static void +smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + + AZ(off & 7); /* Alignment */ + assert(strlen(id) < sizeof ctx->ss->ident); + + memset(ctx, 0, sizeof ctx); + ctx->ss = (void*)(sc->base + off); + ctx->unique = sc->unique; + ctx->id = id; +} + +/*-------------------------------------------------------------------- + * Check that a signature is good, leave state ready for append + */ +static int +smp_chk_sign(struct smp_signctx *ctx) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + int r = 0; + + if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) + r = 1; + else if (ctx->unique != ctx->ss->unique) + r = 2; + else if ((uintptr_t)ctx->ss != ctx->ss->mapped) + r = 3; + else { + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + if (memcmp(sign, SIGN_END(ctx), sizeof sign)) + r = 4; + } + if (r) { + fprintf(stderr, "CHK(%p %s %p %s) = %d\n", + ctx, ctx->id, ctx->ss, + r > 1 ? ctx->ss->ident : "", r); + } + return (r); +} + +/*-------------------------------------------------------------------- + * Append data to a signature + */ +static void +smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + + if (len != 0) { + SHA256_Update(&ctx->ctx, ptr, len); + ctx->ss->length += len; + } + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + memcpy(SIGN_END(ctx), sign, sizeof sign); +XXXAZ(smp_chk_sign(ctx)); +} + +/*-------------------------------------------------------------------- + * Reset a signature to empty, prepare for appending. + */ + +static void +smp_reset_sign(struct smp_signctx *ctx) +{ + + memset(ctx->ss, 0, sizeof *ctx->ss); + strcpy(ctx->ss->ident, ctx->id); + ctx->ss->unique = ctx->unique; + ctx->ss->mapped = (uintptr_t)ctx->ss; + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + smp_append_sign(ctx, NULL, 0); +} + +/*-------------------------------------------------------------------- + * Force a write of a signature block to the backing store. + */ + +static void +smp_sync_sign(const struct smp_signctx *ctx) +{ + int i; + + /* XXX: round to pages */ + i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); + if (i && 0) + fprintf(stderr, "SyncSign(%p %s) = %d %s\n", + ctx->ss, ctx->id, i, strerror(errno)); +} + +/*-------------------------------------------------------------------- + * Create and force a new signature to backing store + */ + +static void +smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + smp_def_sign(sc, ctx, off, id); + smp_reset_sign(ctx); + smp_sync_sign(ctx); +} + +/*-------------------------------------------------------------------- + * Caculate payload of some stuff + */ + +static uint64_t +smp_stuff_len(const struct smp_sc *sc, unsigned stuff) +{ + uint64_t l; + + assert(stuff < SMP_END_STUFF); + l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; + l -= SMP_SIGN_SPACE; + return (l); +} + +/*-------------------------------------------------------------------- + * Initialize a Silo with a valid but empty structure. + * + * XXX: more intelligent sizing of things. + */ + +static void +smp_newsilo(struct smp_sc *sc) +{ + struct smp_ident *si; + + ASSERT_MGT(); + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + /* Choose a new random number */ + sc->unique = random(); + + smp_reset_sign(&sc->idn); + si = sc->ident; + + memset(si, 0, sizeof *si); + strcpy(si->ident, SMP_IDENT_STRING); + si->byte_order = 0x12345678; + si->size = sizeof *si; + si->major_version = 2; + si->unique = sc->unique; + si->mediasize = sc->mediasize; + si->granularity = sc->granularity; + /* + * Aim for cache-line-width + */ + si->align = sizeof(void*) * 2; + sc->align = si->align; + + si->stuff[SMP_BAN1_STUFF] = sc->granularity; + si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; + si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; + si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; + si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; + si->stuff[SMP_END_STUFF] = si->mediasize; + assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); + + smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + smp_append_sign(&sc->idn, si, sizeof *si); + smp_sync_sign(&sc->idn); +} + +/*-------------------------------------------------------------------- + * Check if a silo is valid. + */ + +static int +smp_valid_silo(struct smp_sc *sc) +{ + struct smp_ident *si; + int i, j; + + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + if (smp_chk_sign(&sc->idn)) + return (1); + + si = sc->ident; + if (strcmp(si->ident, SMP_IDENT_STRING)) + return (2); + if (si->byte_order != 0x12345678) + return (3); + if (si->size != sizeof *si) + return (4); + if (si->major_version != 2) + return (5); + if (si->mediasize != sc->mediasize) + return (7); + if (si->granularity != sc->granularity) + return (8); + if (si->align < sizeof(void*)) + return (9); + if (!PWR2(si->align)) + return (10); + sc->align = si->align; + sc->unique = si->unique; + + /* XXX: Sanity check stuff[6] */ + + assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); + assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); + assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); + assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); + assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); + assert(si->stuff[SMP_END_STUFF] == sc->mediasize); + + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == + smp_stuff_len(sc, SMP_SEG2_STUFF)); + + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == + smp_stuff_len(sc, SMP_BAN2_STUFF)); + + smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + /* We must have one valid BAN table */ + i = smp_chk_sign(&sc->ban1); + j = smp_chk_sign(&sc->ban2); + if (i && j) + return (100 + i * 10 + j); + + /* We must have one valid SEG table */ + i = smp_chk_sign(&sc->seg1); + j = smp_chk_sign(&sc->seg2); + if (i && j) + return (200 + i * 10 + j); + return (0); +} + +/*-------------------------------------------------------------------- + * Calculate cleaner metrics from silo dimensions + */ + +static void +smp_metrics(struct smp_sc *sc) +{ + + /* + * We do not want to loose too big chunks of the silos + * content when we are forced to clean a segment. + * + * For now insist that a segment covers no more than 1% of the silo. + * + * XXX: This should possibly depend on the size of the silo so + * XXX: trivially small silos do not run into trouble along + * XXX: the lines of "one object per segment". + */ + + sc->min_nseg = 10; + sc->max_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->min_nseg; + + fprintf(stderr, "min_nseg = %u, max_segl = %ju\n", + sc->min_nseg, (uintmax_t)sc->max_segl); + + /* + * The number of segments are limited by the size of the segment + * table(s) and from that follows the minimum size of a segmement. + */ + + sc->max_nseg = smp_stuff_len(sc, SMP_SEG1_STUFF) / sc->min_nseg; + sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; + + while (sc->min_segl < sizeof(struct object)) { + sc->max_nseg /= 2; + sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; + } + + fprintf(stderr, "max_nseg = %u, min_segl = %ju\n", + sc->max_nseg, (uintmax_t)sc->min_segl); + + /* + * Set our initial aim point at the exponential average of the + * two extremes. + * + * XXX: This is a pretty arbitrary choice, but having no idea + * XXX: object count, size distribution or ttl pattern at this + * XXX: point, we have to do something. + */ + + sc->aim_nseg = + (unsigned) exp((log(sc->min_nseg) + log(sc->max_nseg))*.5); + sc->aim_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->aim_nseg; + + fprintf(stderr, "aim_nseg = %u, aim_segl = %ju\n", + sc->aim_nseg, (uintmax_t)sc->aim_segl); + + /* + * How much space in the free reserve pool ? + */ + sc->free_reserve = sc->aim_segl * 10; + + fprintf(stderr, "free_reserve = %ju\n", sc->free_reserve); +} + +/*-------------------------------------------------------------------- + * Set up persistent storage silo in the master process. + */ + +static void +smp_init(struct stevedore *parent, int ac, char * const *av) +{ + struct smp_sc *sc; + int i; + + ASSERT_MGT(); + + AZ(av[ac]); +#define SIZOF(foo) fprintf(stderr, \ + "sizeof(%s) = %zu = 0x%zx\n", #foo, sizeof(foo), sizeof(foo)); + SIZOF(struct smp_ident); + SIZOF(struct smp_sign); + SIZOF(struct smp_segptr); + SIZOF(struct smp_object); +#undef SIZOF + + /* See comments in persistent.h */ + assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); + + /* Allocate softc */ + ALLOC_OBJ(sc, SMP_SC_MAGIC); + XXXAN(sc); + sc->parent = parent; + sc->fd = -1; + VTAILQ_INIT(&sc->segments); + + /* Argument processing */ + if (ac != 2) + ARGV_ERR("(-spersistent) wrong number of arguments\n"); + + i = STV_GetFile(av[0], &sc->fd, &sc->filename, "-spersistent"); + if (i == 2) + ARGV_ERR("(-spersistent) need filename (not directory)\n"); + + sc->align = sizeof(void*) * 2; + sc->granularity = getpagesize(); + sc->mediasize = STV_FileSize(sc->fd, av[1], &sc->granularity, + "-spersistent"); + + AZ(ftruncate(sc->fd, sc->mediasize)); + + sc->base = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, + MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, 0); + + if (sc->base == MAP_FAILED) + ARGV_ERR("(-spersistent) failed to mmap (%s)\n", + strerror(errno)); + + smp_def_sign(sc, &sc->idn, 0, "SILO"); + sc->ident = SIGN_DATA(&sc->idn); + + i = smp_valid_silo(sc); + if (i) + smp_newsilo(sc); + AZ(smp_valid_silo(sc)); + + smp_metrics(sc); + + parent->priv = sc; + + /* XXX: only for sendfile I guess... */ + mgt_child_inherit(sc->fd, "storage_persistent"); +} + + +/*-------------------------------------------------------------------- + * Write the segmentlist back to the silo. + * + * We write the first copy, sync it synchronously, then write the + * second copy and sync it synchronously. + * + * Provided the kernel doesn't lie, that means we will always have + * at least one valid copy on in the silo. + */ + +static void +smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) +{ + struct smp_segptr *ss; + struct smp_seg *sg; + uint64_t length; + + Lck_AssertHeld(&sc->mtx); + smp_reset_sign(ctx); + ss = SIGN_DATA(ctx); + length = 0; + VTAILQ_FOREACH(sg, &sc->segments, list) { + assert(sg->p.offset < sc->mediasize); + assert(sg->p.offset + sg->p.length <= sc->mediasize); + *ss = sg->p; + ss++; + length += sizeof *ss; + } + smp_append_sign(ctx, SIGN_DATA(ctx), length); + smp_sync_sign(ctx); +} + +static void +smp_save_segs(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + + /* + * Remove empty segments from the front of the list + * before we write the segments to disk. + */ + VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { + if (sg->nobj > 0) + break; + if (sg == sc->cur_seg) + continue; + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + } + smp_save_seg(sc, &sc->seg1); + smp_save_seg(sc, &sc->seg2); +} + + +/*--------------------------------------------------------------------- + */ + +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) +{ + struct smp_object *so; + unsigned smp_idx; + + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; + return (so); +} + +/*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use + */ + +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) +{ + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; + + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; + + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ + + o = pst - sc->base; + + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) + break; + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ + + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ + + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ + + if (st->len > st->space) + return (0x40); /* Plain bad... */ + + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); +} + +/*--------------------------------------------------------------------- + * objcore methods for persistent objects + */ + +static struct object * +smp_oc_getobj(struct worker *wrk, struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + struct storage *st; + uint64_t l; + int bad; + + /* Some calls are direct, but they should match anyway */ + assert(oc->methods->getobj == smp_oc_getobj); + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + if (wrk == NULL) + AZ(oc->flags & OC_F_NEEDFIXUP); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + o = (void*)(sg->sc->base + so->ptr); + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + + /* + * If this flag is not set, it will not be, and the lock is not + * needed to test it. + */ + if (!(oc->flags & OC_F_NEEDFIXUP)) + return (o); + + AN(wrk); + Lck_Lock(&sg->sc->mtx); + /* Check again, we might have raced. */ + if (oc->flags & OC_F_NEEDFIXUP) { + /* We trust caller to have a refcnt for us */ + o->objcore = oc; + + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; + + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } + + sg->nfixed++; + wrk->stats.n_object++; + wrk->stats.n_vampireobject--; + oc->flags &= ~OC_F_NEEDFIXUP; + } + Lck_Unlock(&sg->sc->mtx); + return (o); +} + +static void +smp_oc_updatemeta(struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + double mttl; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + o = smp_oc_getobj(NULL, oc); + AN(o); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); + so = smp_find_so(sg, oc); + + if (isnan(o->grace)) + mttl = o->ttl; + else + mttl = - (o->ttl + o->grace); + + if (sg == sg->sc->cur_seg) { + /* Lock necessary, we might race close_seg */ + Lck_Lock(&sg->sc->mtx); + so->ban = o->ban_t; + so->ttl = mttl; + Lck_Unlock(&sg->sc->mtx); + } else { + so->ban = o->ban_t; + so->ttl = mttl; + } +} + +static void __match_proto__() +smp_oc_freeobj(struct objcore *oc) +{ + struct smp_seg *sg; + struct smp_object *so; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + Lck_Lock(&sg->sc->mtx); + so->ttl = 0; + so->ptr = 0; + + assert(sg->nobj > 0); + assert(sg->nfixed > 0); + sg->nobj--; + sg->nfixed--; + + Lck_Unlock(&sg->sc->mtx); +} + +static struct objcore_methods smp_oc_methods = { + .getobj = smp_oc_getobj, + .updatemeta = smp_oc_updatemeta, + .freeobj = smp_oc_freeobj, +}; + +/*-------------------------------------------------------------------- + * Add a new ban to all silos + */ + +static void +smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, + uint32_t flags, uint32_t len, const char *ban) +{ + uint8_t *ptr, *ptr2; + + (void)sc; + ptr = ptr2 = SIGN_END(ctx); + + memcpy(ptr, "BAN", 4); + ptr += 4; + + memcpy(ptr, &t0, sizeof t0); + ptr += sizeof t0; + + memcpy(ptr, &flags, sizeof flags); + ptr += sizeof flags; + + memcpy(ptr, &len, sizeof len); + ptr += sizeof len; + + memcpy(ptr, ban, len); + ptr += len; + + smp_append_sign(ctx, ptr2, ptr - ptr2); +} + +void +SMP_NewBan(double t0, const char *ban) +{ + struct smp_sc *sc; + uint32_t l = strlen(ban) + 1; + + VTAILQ_FOREACH(sc, &silos, list) { + smp_appendban(sc, &sc->ban1, t0, 0, l, ban); + smp_appendban(sc, &sc->ban2, t0, 0, l, ban); + } +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a ban list + */ + +static int +smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint8_t *ptr, *pe; + double t0; + uint32_t flags, length; + int i, retval = 0; + + ASSERT_CLI(); + (void)sc; + i = smp_chk_sign(ctx); + if (i) + return (i); + ptr = SIGN_DATA(ctx); + pe = ptr + ctx->ss->length; + + while (ptr < pe) { + if (memcmp(ptr, "BAN", 4)) { + retval = 1001; + break; + } + ptr += 4; + + memcpy(&t0, ptr, sizeof t0); + ptr += sizeof t0; + + memcpy(&flags, ptr, sizeof flags); + ptr += sizeof flags; + if (flags != 0) { + retval = 1002; + break; + } + + memcpy(&length, ptr, sizeof length); + ptr += sizeof length; + if (ptr + length > pe) { + retval = 1003; + break; + } + + if (ptr[length - 1] != '\0') { + retval = 1004; + break; + } + + BAN_Reload(t0, flags, (const char *)ptr); + + ptr += length; + } + assert(ptr <= pe); + return (retval); +} + + +/*--------------------------------------------------------------------*/ + +static uint64_t +smp_segend(const struct smp_seg *sg) +{ + + return (sg->p.offset + sg->p.length); +} + +static uint64_t +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) +{ + + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); +} + +/*-------------------------------------------------------------------- + * Load segments + * + * The overall objective is to register the existence of an object, based + * only on the minimally sized struct smp_object, without causing the + * main object to be faulted in. + * + * XXX: We can test this by mprotecting the main body of the segment + * XXX: until the first fixup happens, or even just over this loop, + * XXX: However: the requires that the smp_objects starter further + * XXX: into the segment than a page so that they do not get hit + * XXX: by the protection. + */ + +static void +smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) +{ + struct smp_object *so; + struct objcore *oc; + uint32_t no; + double t_now = TIM_real(); + struct smp_signctx ctx[1]; + + ASSERT_SILO_THREAD(sc); + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + assert(sg->flags & SMP_SEG_MUSTLOAD); + sg->flags &= ~SMP_SEG_MUSTLOAD; + AN(sg->p.offset); + if (sg->p.objlist == 0) + return; + smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); + if (smp_chk_sign(ctx)) + return; + + /* test SEGTAIL */ + /* test OBJIDX */ + so = (void*)(sc->base + sg->p.objlist); + sg->objs = so; + no = sg->p.lobjlist; + /* Clear the bogus "hold" count */ + sg->nobj = 0; + for (;no > 0; so++,no--) { + if (so->ttl > 0 && so->ttl < t_now) + continue; + if (so->ttl < 0 && -so->ttl < t_now) + continue; + HSH_Prealloc(sp); + oc = sp->wrk->nobjcore; + oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; + oc->flags &= ~OC_F_BUSY; + oc->priv = sg; + oc->priv2 = no; + oc->methods = &smp_oc_methods; + oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); + memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); + (void)HSH_Insert(sp); + AZ(sp->wrk->nobjcore); + EXP_Inject(oc, sg->lru, fabs(so->ttl)); + sg->nobj++; + } + WRK_SumStat(sp->wrk); + sg->flags |= SMP_SEG_LOADED; +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a segment list + */ + +static int +smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint64_t length, l; + struct smp_segptr *ss, *se; + struct smp_seg *sg, *sg1, *sg2; + int i, n = 0; + + ASSERT_CLI(); + i = smp_chk_sign(ctx); + if (i) + return (i); + + ss = SIGN_DATA(ctx); + length = ctx->ss->length; + + if (length == 0) { + /* No segments */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + return (0); + } + se = ss + length / sizeof *ss; + se--; + assert(ss <= se); + + /* + * Locate the free reserve, there are only two basic cases, + * but once we start dropping segments, things gets more complicated. + */ + + sc->free_offset = se->offset + se->length; + l = sc->mediasize - sc->free_offset; + if (se->offset > ss->offset && l >= sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * Plenty of space at tail, do nothing. + */ + } else if (ss->offset > se->offset) { + /* + * [zzzz____xxxxyyyy_] + * (make) space between ends + * We might nuke the entire tail end without getting + * enough space, in which case we fall through to the + * last check. + */ + while (ss < se && ss->offset > se->offset) { + l = ss->offset - (se->offset + se->length); + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + if (l < sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * (make) space at front + */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + while (ss < se) { + l = ss->offset - sc->free_offset; + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + assert (l >= sc->free_reserve); + + + sg1 = NULL; + sg2 = NULL; + for(; ss <= se; ss++) { + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + sg->p = *ss; + + sg->flags |= SMP_SEG_MUSTLOAD; + + /* + * HACK: prevent save_segs from nuking segment until we have + * HACK: loaded it. + */ + sg->nobj = 1; + if (sg1 != NULL) { + assert(sg1->p.offset != sg->p.offset); + if (sg1->p.offset < sg->p.offset) + assert(smp_segend(sg1) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg1->p.offset); + } + if (sg2 != NULL) { + assert(sg2->p.offset != sg->p.offset); + if (sg2->p.offset < sg->p.offset) + assert(smp_segend(sg2) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg2->p.offset); + } + + /* XXX: check that they are inside silo */ + /* XXX: check that they don't overlap */ + /* XXX: check that they are serial */ + sg->sc = sc; + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + sg2 = sg; + if (sg1 == NULL) + sg1 = sg; + } + printf("Dropped %d segments to make free_reserve\n", n); + return (0); +} + +/*-------------------------------------------------------------------- + * Create a new segment + */ + +static void +smp_new_seg(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->sc = sc; + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + + /* XXX: find where it goes in silo */ + + sg->p.offset = sc->free_offset; + // XXX: align */ + assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); + assert(sg->p.offset < sc->mediasize); + + sg->p.length = sc->aim_segl; + sg->p.length &= ~7; + + if (smp_segend(sg) > sc->mediasize) { + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + sg->p.offset = sc->free_offset; + sg2 = VTAILQ_FIRST(&sc->segments); + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + } + + + assert(smp_segend(sg) <= sc->mediasize); + + sg2 = VTAILQ_FIRST(&sc->segments); + if (sg2 != NULL && sg2->p.offset > sc->free_offset) { + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + assert(smp_segend(sg) <= sg2->p.offset); + } + + sg->p.offset = IRNUP(sc, sg->p.offset); + sg->p.length = IRNDN(sc, sg->p.length); + sc->free_offset = sg->p.offset + sg->p.length; + + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + + /* Neuter the new segment in case there is an old one there */ + AN(sg->p.offset); + smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Set up our allocation points */ + sc->cur_seg = sg; + sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sc->next_top = smp_segend(sg); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sc->next_bot); + IASSERTALIGN(sc, sc->next_top); + sg->objs = (void*)(sc->base + sc->next_top); +} + +/*-------------------------------------------------------------------- + * Close a segment + */ + +static void +smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) +{ + uint64_t left, dst, len; + void *dp; + + Lck_AssertHeld(&sc->mtx); + + assert(sg == sc->cur_seg); + AN(sg->p.offset); + sc->cur_seg = NULL; + + if (sg->nalloc == 0) { + /* XXX: if segment is empty, delete instead */ + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + return; + } + + /* + * If there is enough space left, that we can move the smp_objects + * down without overwriting the present copy, we will do so to + * compact the segment. + */ + left = smp_spaceleft(sc, sg); + len = sizeof(struct smp_object) * sg->p.lobjlist; + if (len < left) { + dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dp = sc->base + dst; + assert((uintptr_t)dp + len < (uintptr_t)sg->objs); + memcpy(dp, sg->objs, len); + sc->next_top = dst; + sg->objs = dp; + sg->p.length = (sc->next_top - sg->p.offset) + + len + IRNUP(sc, SMP_SIGN_SPACE); + (void)smp_spaceleft(sc, sg); /* for the asserts */ + + } + + /* Update the segment header */ + sg->p.objlist = sc->next_top; + + /* Write the (empty) OBJIDX signature */ + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sc->next_top >= sc->next_bot); + smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Write the (empty) SEGTAIL signature */ + smp_def_sign(sc, sg->ctx, + sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Save segment list */ + smp_save_segs(sc); + sc->free_offset = smp_segend(sg); +} + +/*-------------------------------------------------------------------- + * Silo worker thread + */ + +static void * +smp_thread(struct sess *sp, void *priv) +{ + struct smp_sc *sc; + struct smp_seg *sg; + + (void)sp; + CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); + + /* First, load all the objects from all segments */ + VTAILQ_FOREACH(sg, &sc->segments, list) + if (sg->flags & SMP_SEG_MUSTLOAD) + smp_load_seg(sp, sc, sg); + + sc->flags |= SMP_SC_LOADED; + BAN_Deref(&sc->tailban); + sc->tailban = NULL; + printf("Silo completely loaded\n"); + while (1) + (void)sleep (1); + NEEDLESS_RETURN(NULL); +} + +/*-------------------------------------------------------------------- + * Open a silo in the worker process + */ + +static void +smp_open(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + + Lck_New(&sc->mtx, lck_smp); + Lck_Lock(&sc->mtx); + + sc->stevedore = st; + + /* We trust the parent to give us a valid silo, for good measure: */ + AZ(smp_valid_silo(sc)); + + AZ(mprotect(sc->base, 4096, PROT_READ)); + + sc->ident = SIGN_DATA(&sc->idn); + + /* We attempt ban1 first, and if that fails, try ban2 */ + if (smp_open_bans(sc, &sc->ban1)) + AZ(smp_open_bans(sc, &sc->ban2)); + + /* We attempt seg1 first, and if that fails, try seg2 */ + if (smp_open_segs(sc, &sc->seg1)) + AZ(smp_open_segs(sc, &sc->seg2)); + + sc->tailban = BAN_TailRef(); + AN(sc->tailban); + + /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ + + /* XXX: abandon early segments to make sure we have free space ? */ + + /* Open a new segment, so we are ready to write */ + smp_new_seg(sc); + + /* Start the worker silo worker thread, it will load the objects */ + WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); + + VTAILQ_INSERT_TAIL(&silos, sc, list); + Lck_Unlock(&sc->mtx); +} + +/*-------------------------------------------------------------------- + * Close a silo + */ + +static void +smp_close(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + Lck_Lock(&sc->mtx); + smp_close_seg(sc, sc->cur_seg); + Lck_Unlock(&sc->mtx); + + /* XXX: reap thread */ +} + +/*-------------------------------------------------------------------- + * Allocate a bite. + * + * Allocate [min_size...max_size] space from the bottom of the segment, + * as is convenient. + * + * If 'so' + 'idx' is given, also allocate a smp_object from the top + * of the segment. + * + * Return the segment in 'ssg' if given. + */ + +static struct storage * +smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, + struct smp_object **so, unsigned *idx, struct smp_seg **ssg) +{ + struct smp_sc *sc; + struct storage *ss; + struct smp_seg *sg; + unsigned tries; + uint64_t left, extra; + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + assert(min_size <= max_size); + + max_size = IRNUP(sc, max_size); + min_size = IRNUP(sc, min_size); + + extra = IRNUP(sc, sizeof(*ss)); + if (so != NULL) { + extra += sizeof(**so); + AN(idx); + } + + Lck_Lock(&sc->mtx); + sg = NULL; + ss = NULL; + for (tries = 0; tries < 3; tries++) { + left = smp_spaceleft(sc, sc->cur_seg); + if (left >= extra + min_size) + break; + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } + if (left >= extra + min_size) { + if (left < extra + max_size) + max_size = IRNDN(sc, left - extra); + + sg = sc->cur_seg; + ss = (void*)(sc->base + sc->next_bot); + sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + sg->nalloc++; + if (so != NULL) { + sc->next_top -= sizeof(**so); + *so = (void*)(sc->base + sc->next_top); + /* Render this smp_object mostly harmless */ + (*so)->ttl = 0.; + (*so)->ban = 0.; + (*so)->ptr = 0;; + sg->objs = *so; + *idx = ++sg->p.lobjlist; + } + (void)smp_spaceleft(sc, sg); /* for the assert */ + } + Lck_Unlock(&sc->mtx); + + if (ss == NULL) + return (ss); + AN(sg); + assert(max_size >= min_size); + + /* Fill the storage structure */ + memset(ss, 0, sizeof *ss); + ss->magic = STORAGE_MAGIC; + ss->ptr = PRNUP(sc, ss + 1); + ss->space = max_size; + ss->priv = sc; + ss->stevedore = st; + ss->fd = sc->fd; + if (ssg != NULL) + *ssg = sg; + return (ss); +} + +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_getlru(const struct object *o) +{ + struct smp_seg *sg; + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); + return (sg->lru); +} + +/*-------------------------------------------------------------------- + * Allocate an object + */ + +static struct object * +smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, + const struct stv_objsecrets *soc) +{ + struct object *o; + struct storage *st; + struct smp_sc *sc; + struct smp_seg *sg; + struct smp_object *so; + struct objcore *oc; + unsigned objidx; + + CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); + AN(sp->objcore); + AN(sp->wrk->ttl >= 0); + + ltot = IRNUP(sc, ltot); + + st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); + if (st == NULL) + return (NULL); + + assert(st->space >= ltot); + ltot = st->len = st->space; + + o = STV_MkObject(sp, st->ptr, ltot, soc); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + o->objstore = st; + + oc = o->objcore; + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + oc->flags |= OC_F_LRUDONTMOVE; + + Lck_Lock(&sc->mtx); + sg->nfixed++; + sg->nobj++; + + /* We have to do this somewhere, might as well be here... */ + assert(sizeof so->hash == DIGEST_LEN); + memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); + so->ttl = o->ttl; /* XXX: grace? */ + so->ptr = (uint8_t*)o - sc->base; + so->ban = o->ban_t; + + oc->priv = sg; + oc->priv2 = objidx; + oc->methods = &smp_oc_methods; + + Lck_Unlock(&sc->mtx); + return (o); +} + +/*-------------------------------------------------------------------- + * Allocate a bite + */ + +static struct storage * +smp_alloc(struct stevedore *st, size_t size) +{ + + return (smp_allocx(st, + size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); +} + +/*-------------------------------------------------------------------- + * Trim a bite + * XXX: We could trim the last allocation. + */ + +static void +smp_trim(struct storage *ss, size_t size) +{ + + (void)ss; + (void)size; +} + +/*-------------------------------------------------------------------- + * We don't track frees of storage, we track the objects which own the + * storage and when there are no more objects in in the first segment, + * it can be reclaimed. + * XXX: We could free the last allocation, but does that happen ? + */ + +static void __match_proto__(storage_free_f) +smp_free(struct storage *st) +{ + + /* XXX */ + (void)st; +} + +/*-------------------------------------------------------------------- + * Pause until all silos have loaded. + */ + +void +SMP_Ready(void) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_SC_LOADED)) + break; + if (sc != NULL) + (void)sleep(1); + } while (sc != NULL); +} + +/*--------------------------------------------------------------------*/ + +const struct stevedore smp_stevedore = { + .magic = STEVEDORE_MAGIC, + .name = "persistent", + .init = smp_init, + .open = smp_open, + .close = smp_close, + .alloc = smp_alloc, + .allocobj = smp_allocobj, + .getlru = smp_getlru, + .free = smp_free, + .trim = smp_trim, +}; + +/*-------------------------------------------------------------------- + * Persistence is a bear to test unadultered, so we cheat by adding + * a cli command we can use to make it do tricks for us. + */ + +static void +debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) +{ + struct smp_seg *sg; + struct objcore *oc; + + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", + (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); + if (sg == sc->cur_seg) + cli_out(cli, + " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", + (uintmax_t)(sc->next_bot), + (uintmax_t)(sc->next_top), + (uintmax_t)(sc->next_top - sc->next_bot)); + cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", + sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); + if (objs) { + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " %s %p\n", + oc == &sg->lru->senteniel ? + "senteniel" : "OC: ", oc); + } + } +} + +static void +debug_persistent(struct cli *cli, const char * const * av, void *priv) +{ + struct smp_sc *sc; + + (void)priv; + + if (av[2] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) + debug_report_silo(cli, sc, 0); + return; + } + VTAILQ_FOREACH(sc, &silos, list) + if (!strcmp(av[2], sc->stevedore->ident)) + break; + if (sc == NULL) { + cli_out(cli, "Silo <%s> not found\n", av[2]); + cli_result(cli, CLIS_PARAM); + return; + } + if (av[3] == NULL) { + debug_report_silo(cli, sc, 0); + return; + } + Lck_Lock(&sc->mtx); + if (!strcmp(av[3], "sync")) { + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } else if (!strcmp(av[3], "dump")) { + debug_report_silo(cli, sc, 1); + } else { + cli_out(cli, "Unknown operation\n"); + cli_result(cli, CLIS_PARAM); + } + Lck_Unlock(&sc->mtx); +} + +static struct cli_proto debug_cmds[] = { + { "debug.persistent", "debug.persistent", + "Persistent debugging magic:\n" + "\tdebug.persistent [stevedore [cmd]]\n" + "With no cmd arg, a summary of the silo is returned.\n" + "Possible commands:\n" + "\tsync\tClose current segment, open a new one\n" + "\tdump\tinclude objcores in silo summary\n" + "", + 0, 2, "d", debug_persistent }, + { NULL } +}; + +void +SMP_Init(void) +{ + CLI_AddFuncs(debug_cmds); +} From phk at varnish-cache.org Tue Feb 8 10:50:29 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:29 +0100 Subject: [master] 20b88b1 Split .h material out from storage_persistent.c Message-ID: commit 20b88b19225de964f5381156e681df89b66a59e9 Author: Poul-Henning Kamp Date: Tue Feb 8 09:48:07 2011 +0000 Split .h material out from storage_persistent.c diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index 7948977..af826a3 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -87,6 +87,7 @@ noinst_HEADERS = \ mgt_cli.h \ steps.h \ stevedore.h \ + storage_persistent.h \ vparam.h varnishd_CFLAGS = \ diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index aab14bb..9e51120 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -56,142 +56,7 @@ SVNID("$Id$") #include "cli_priv.h" #include "persistent.h" - -#ifndef MAP_NOCORE -#define MAP_NOCORE 0 /* XXX Linux */ -#endif - -#ifndef MAP_NOSYNC -#define MAP_NOSYNC 0 /* XXX Linux */ -#endif - -#define ASSERT_SILO_THREAD(sc) \ - do {assert(pthread_self() == (sc)->thread);} while (0) - -#define OC_F_NEEDFIXUP OC_F_PRIV - -/* - * Context for a signature. - * - * A signature is a sequence of bytes in the silo, signed by a SHA256 hash - * which follows the bytes. - * - * The context structure allows us to append to a signature without - * recalculating the entire SHA256 hash. - */ - -struct smp_signctx { - struct smp_sign *ss; - struct SHA256Context ctx; - uint32_t unique; - const char *id; -}; - -struct smp_sc; - -/* XXX: name confusion with on-media version ? */ -struct smp_seg { - unsigned magic; -#define SMP_SEG_MAGIC 0x45c61895 - - struct smp_sc *sc; - struct lru *lru; - - VTAILQ_ENTRY(smp_seg) list; /* on smp_sc.smp_segments */ - - struct smp_segptr p; - - unsigned flags; -#define SMP_SEG_MUSTLOAD (1 << 0) -#define SMP_SEG_LOADED (1 << 1) - - uint32_t nobj; /* Number of objects */ - uint32_t nalloc; /* Allocations */ - uint32_t nfixed; /* How many fixed objects */ - - /* Only for open segment */ - struct smp_object *objs; /* objdesc array */ - struct smp_signctx ctx[1]; -}; - -VTAILQ_HEAD(smp_seghead, smp_seg); - -struct smp_sc { - unsigned magic; -#define SMP_SC_MAGIC 0x7b73af0a - struct stevedore *parent; - - unsigned flags; -#define SMP_SC_LOADED (1 << 0) - - const struct stevedore *stevedore; - int fd; - const char *filename; - off_t mediasize; - uint64_t align; /* 64b to avoid casts */ - uint32_t granularity; - uint32_t unique; - - uint8_t *base; - - struct smp_ident *ident; - - struct smp_seghead segments; - struct smp_seg *cur_seg; - uint64_t next_bot; /* next alloc address bottom */ - uint64_t next_top; /* next alloc address top */ - - uint64_t free_offset; - - pthread_t thread; - - VTAILQ_ENTRY(smp_sc) list; - - struct smp_signctx idn; - struct smp_signctx ban1; - struct smp_signctx ban2; - struct smp_signctx seg1; - struct smp_signctx seg2; - - struct ban *tailban; - - struct lock mtx; - - /* Cleaner metrics */ - - unsigned min_nseg; - unsigned aim_nseg; - unsigned max_nseg; - - uint64_t min_segl; - uint64_t aim_segl; - uint64_t max_segl; - - uint64_t free_reserve; -}; - -/*--------------------------------------------------------------------*/ - -/* Generic power-2 rounding */ -#define PWR2(x) ((((x)-1)&(x))==0) /* Is a power of two */ -#define RDN2(x, y) ((x)&(~((y)-1))) /* if y is powers of two */ -#define RUP2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ - -/* Pointer round up/down & assert */ -#define PRNDN(sc, x) ((void*)RDN2((uintptr_t)(x), sc->align)) -#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)(x), sc->align)) -#define PASSERTALIGN(sc, x) assert(PRNDN(sc, x) == (x)) - -/* Integer round up/down & assert */ -#define IRNDN(sc, x) RDN2(x, sc->align) -#define IRNUP(sc, x) RUP2(x, sc->align) -#define IASSERTALIGN(sc, x) assert(IRNDN(sc, x) == (x)) - -/*--------------------------------------------------------------------*/ - -#define ASSERT_PTR_IN_SILO(sc, ptr) \ - assert((const void*)(ptr) >= (const void*)((sc)->base) && \ - (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize)) +#include "storage_persistent.h" /*--------------------------------------------------------------------*/ @@ -210,9 +75,6 @@ static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); * The signature is stored after the byte-range from step 2. */ -#define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) -#define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) - /*-------------------------------------------------------------------- * Define a signature by location and identifier. */ diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index aab14bb..5c7af37 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -33,30 +33,6 @@ * XXX: Do we ever free the LRU-lists ? */ -#include "config.h" - -#include "svnid.h" -SVNID("$Id$") - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cache.h" -#include "stevedore.h" -#include "hash_slinger.h" -#include "vsha256.h" -#include "cli.h" -#include "cli_priv.h" - -#include "persistent.h" - #ifndef MAP_NOCORE #define MAP_NOCORE 0 /* XXX Linux */ #endif @@ -195,1524 +171,5 @@ struct smp_sc { /*--------------------------------------------------------------------*/ -/* - * silos is unlocked, it only changes during startup when we are - * single-threaded - */ -static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); - -/*-------------------------------------------------------------------- - * SIGNATURE functions - * The signature is SHA256 over: - * 1. The smp_sign struct up to but not including the length field. - * 2. smp_sign->length bytes, starting after the smp_sign structure - * 3. The smp-sign->length field. - * The signature is stored after the byte-range from step 2. - */ - #define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) #define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) - -/*-------------------------------------------------------------------- - * Define a signature by location and identifier. - */ - -static void -smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - - AZ(off & 7); /* Alignment */ - assert(strlen(id) < sizeof ctx->ss->ident); - - memset(ctx, 0, sizeof ctx); - ctx->ss = (void*)(sc->base + off); - ctx->unique = sc->unique; - ctx->id = id; -} - -/*-------------------------------------------------------------------- - * Check that a signature is good, leave state ready for append - */ -static int -smp_chk_sign(struct smp_signctx *ctx) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - int r = 0; - - if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) - r = 1; - else if (ctx->unique != ctx->ss->unique) - r = 2; - else if ((uintptr_t)ctx->ss != ctx->ss->mapped) - r = 3; - else { - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - if (memcmp(sign, SIGN_END(ctx), sizeof sign)) - r = 4; - } - if (r) { - fprintf(stderr, "CHK(%p %s %p %s) = %d\n", - ctx, ctx->id, ctx->ss, - r > 1 ? ctx->ss->ident : "", r); - } - return (r); -} - -/*-------------------------------------------------------------------- - * Append data to a signature - */ -static void -smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - - if (len != 0) { - SHA256_Update(&ctx->ctx, ptr, len); - ctx->ss->length += len; - } - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - memcpy(SIGN_END(ctx), sign, sizeof sign); -XXXAZ(smp_chk_sign(ctx)); -} - -/*-------------------------------------------------------------------- - * Reset a signature to empty, prepare for appending. - */ - -static void -smp_reset_sign(struct smp_signctx *ctx) -{ - - memset(ctx->ss, 0, sizeof *ctx->ss); - strcpy(ctx->ss->ident, ctx->id); - ctx->ss->unique = ctx->unique; - ctx->ss->mapped = (uintptr_t)ctx->ss; - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - smp_append_sign(ctx, NULL, 0); -} - -/*-------------------------------------------------------------------- - * Force a write of a signature block to the backing store. - */ - -static void -smp_sync_sign(const struct smp_signctx *ctx) -{ - int i; - - /* XXX: round to pages */ - i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); - if (i && 0) - fprintf(stderr, "SyncSign(%p %s) = %d %s\n", - ctx->ss, ctx->id, i, strerror(errno)); -} - -/*-------------------------------------------------------------------- - * Create and force a new signature to backing store - */ - -static void -smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - smp_def_sign(sc, ctx, off, id); - smp_reset_sign(ctx); - smp_sync_sign(ctx); -} - -/*-------------------------------------------------------------------- - * Caculate payload of some stuff - */ - -static uint64_t -smp_stuff_len(const struct smp_sc *sc, unsigned stuff) -{ - uint64_t l; - - assert(stuff < SMP_END_STUFF); - l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; - l -= SMP_SIGN_SPACE; - return (l); -} - -/*-------------------------------------------------------------------- - * Initialize a Silo with a valid but empty structure. - * - * XXX: more intelligent sizing of things. - */ - -static void -smp_newsilo(struct smp_sc *sc) -{ - struct smp_ident *si; - - ASSERT_MGT(); - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - /* Choose a new random number */ - sc->unique = random(); - - smp_reset_sign(&sc->idn); - si = sc->ident; - - memset(si, 0, sizeof *si); - strcpy(si->ident, SMP_IDENT_STRING); - si->byte_order = 0x12345678; - si->size = sizeof *si; - si->major_version = 2; - si->unique = sc->unique; - si->mediasize = sc->mediasize; - si->granularity = sc->granularity; - /* - * Aim for cache-line-width - */ - si->align = sizeof(void*) * 2; - sc->align = si->align; - - si->stuff[SMP_BAN1_STUFF] = sc->granularity; - si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; - si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; - si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; - si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; - si->stuff[SMP_END_STUFF] = si->mediasize; - assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); - - smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - smp_append_sign(&sc->idn, si, sizeof *si); - smp_sync_sign(&sc->idn); -} - -/*-------------------------------------------------------------------- - * Check if a silo is valid. - */ - -static int -smp_valid_silo(struct smp_sc *sc) -{ - struct smp_ident *si; - int i, j; - - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - if (smp_chk_sign(&sc->idn)) - return (1); - - si = sc->ident; - if (strcmp(si->ident, SMP_IDENT_STRING)) - return (2); - if (si->byte_order != 0x12345678) - return (3); - if (si->size != sizeof *si) - return (4); - if (si->major_version != 2) - return (5); - if (si->mediasize != sc->mediasize) - return (7); - if (si->granularity != sc->granularity) - return (8); - if (si->align < sizeof(void*)) - return (9); - if (!PWR2(si->align)) - return (10); - sc->align = si->align; - sc->unique = si->unique; - - /* XXX: Sanity check stuff[6] */ - - assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); - assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); - assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); - assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); - assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); - assert(si->stuff[SMP_END_STUFF] == sc->mediasize); - - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == - smp_stuff_len(sc, SMP_SEG2_STUFF)); - - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == - smp_stuff_len(sc, SMP_BAN2_STUFF)); - - smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - /* We must have one valid BAN table */ - i = smp_chk_sign(&sc->ban1); - j = smp_chk_sign(&sc->ban2); - if (i && j) - return (100 + i * 10 + j); - - /* We must have one valid SEG table */ - i = smp_chk_sign(&sc->seg1); - j = smp_chk_sign(&sc->seg2); - if (i && j) - return (200 + i * 10 + j); - return (0); -} - -/*-------------------------------------------------------------------- - * Calculate cleaner metrics from silo dimensions - */ - -static void -smp_metrics(struct smp_sc *sc) -{ - - /* - * We do not want to loose too big chunks of the silos - * content when we are forced to clean a segment. - * - * For now insist that a segment covers no more than 1% of the silo. - * - * XXX: This should possibly depend on the size of the silo so - * XXX: trivially small silos do not run into trouble along - * XXX: the lines of "one object per segment". - */ - - sc->min_nseg = 10; - sc->max_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->min_nseg; - - fprintf(stderr, "min_nseg = %u, max_segl = %ju\n", - sc->min_nseg, (uintmax_t)sc->max_segl); - - /* - * The number of segments are limited by the size of the segment - * table(s) and from that follows the minimum size of a segmement. - */ - - sc->max_nseg = smp_stuff_len(sc, SMP_SEG1_STUFF) / sc->min_nseg; - sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; - - while (sc->min_segl < sizeof(struct object)) { - sc->max_nseg /= 2; - sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; - } - - fprintf(stderr, "max_nseg = %u, min_segl = %ju\n", - sc->max_nseg, (uintmax_t)sc->min_segl); - - /* - * Set our initial aim point at the exponential average of the - * two extremes. - * - * XXX: This is a pretty arbitrary choice, but having no idea - * XXX: object count, size distribution or ttl pattern at this - * XXX: point, we have to do something. - */ - - sc->aim_nseg = - (unsigned) exp((log(sc->min_nseg) + log(sc->max_nseg))*.5); - sc->aim_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->aim_nseg; - - fprintf(stderr, "aim_nseg = %u, aim_segl = %ju\n", - sc->aim_nseg, (uintmax_t)sc->aim_segl); - - /* - * How much space in the free reserve pool ? - */ - sc->free_reserve = sc->aim_segl * 10; - - fprintf(stderr, "free_reserve = %ju\n", sc->free_reserve); -} - -/*-------------------------------------------------------------------- - * Set up persistent storage silo in the master process. - */ - -static void -smp_init(struct stevedore *parent, int ac, char * const *av) -{ - struct smp_sc *sc; - int i; - - ASSERT_MGT(); - - AZ(av[ac]); -#define SIZOF(foo) fprintf(stderr, \ - "sizeof(%s) = %zu = 0x%zx\n", #foo, sizeof(foo), sizeof(foo)); - SIZOF(struct smp_ident); - SIZOF(struct smp_sign); - SIZOF(struct smp_segptr); - SIZOF(struct smp_object); -#undef SIZOF - - /* See comments in persistent.h */ - assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); - - /* Allocate softc */ - ALLOC_OBJ(sc, SMP_SC_MAGIC); - XXXAN(sc); - sc->parent = parent; - sc->fd = -1; - VTAILQ_INIT(&sc->segments); - - /* Argument processing */ - if (ac != 2) - ARGV_ERR("(-spersistent) wrong number of arguments\n"); - - i = STV_GetFile(av[0], &sc->fd, &sc->filename, "-spersistent"); - if (i == 2) - ARGV_ERR("(-spersistent) need filename (not directory)\n"); - - sc->align = sizeof(void*) * 2; - sc->granularity = getpagesize(); - sc->mediasize = STV_FileSize(sc->fd, av[1], &sc->granularity, - "-spersistent"); - - AZ(ftruncate(sc->fd, sc->mediasize)); - - sc->base = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, - MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, 0); - - if (sc->base == MAP_FAILED) - ARGV_ERR("(-spersistent) failed to mmap (%s)\n", - strerror(errno)); - - smp_def_sign(sc, &sc->idn, 0, "SILO"); - sc->ident = SIGN_DATA(&sc->idn); - - i = smp_valid_silo(sc); - if (i) - smp_newsilo(sc); - AZ(smp_valid_silo(sc)); - - smp_metrics(sc); - - parent->priv = sc; - - /* XXX: only for sendfile I guess... */ - mgt_child_inherit(sc->fd, "storage_persistent"); -} - - -/*-------------------------------------------------------------------- - * Write the segmentlist back to the silo. - * - * We write the first copy, sync it synchronously, then write the - * second copy and sync it synchronously. - * - * Provided the kernel doesn't lie, that means we will always have - * at least one valid copy on in the silo. - */ - -static void -smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) -{ - struct smp_segptr *ss; - struct smp_seg *sg; - uint64_t length; - - Lck_AssertHeld(&sc->mtx); - smp_reset_sign(ctx); - ss = SIGN_DATA(ctx); - length = 0; - VTAILQ_FOREACH(sg, &sc->segments, list) { - assert(sg->p.offset < sc->mediasize); - assert(sg->p.offset + sg->p.length <= sc->mediasize); - *ss = sg->p; - ss++; - length += sizeof *ss; - } - smp_append_sign(ctx, SIGN_DATA(ctx), length); - smp_sync_sign(ctx); -} - -static void -smp_save_segs(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - - /* - * Remove empty segments from the front of the list - * before we write the segments to disk. - */ - VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { - if (sg->nobj > 0) - break; - if (sg == sc->cur_seg) - continue; - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - } - smp_save_seg(sc, &sc->seg1); - smp_save_seg(sc, &sc->seg2); -} - - -/*--------------------------------------------------------------------- - */ - -static struct smp_object * -smp_find_so(const struct smp_seg *sg, const struct objcore *oc) -{ - struct smp_object *so; - unsigned smp_idx; - - smp_idx = oc->priv2; - assert(smp_idx > 0); - assert(smp_idx <= sg->p.lobjlist); - so = &sg->objs[sg->p.lobjlist - smp_idx]; - return (so); -} - -/*--------------------------------------------------------------------- - * Check if a given storage structure is valid to use - */ - -static int -smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, - const struct storage *st) -{ - struct smp_seg *sg2; - const uint8_t *pst; - uint64_t o; - - (void)sg; /* XXX: faster: Start search from here */ - pst = (const void *)st; - - if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) - return (0x01); /* Before silo payload start */ - if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) - return (0x02); /* After silo end */ - - o = pst - sc->base; - - /* Find which segment contains the storage structure */ - VTAILQ_FOREACH(sg2, &sc->segments, list) - if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) - break; - if (sg2 == NULL) - return (0x04); /* No claiming segment */ - if (!(sg2->flags & SMP_SEG_LOADED)) - return (0x08); /* Claiming segment not loaded */ - - /* It is now safe to access the storage structure */ - if (st->magic != STORAGE_MAGIC) - return (0x10); /* Not enough magic */ - - if (o + st->space >= sg2->p.objlist) - return (0x20); /* Allocation not inside segment */ - - if (st->len > st->space) - return (0x40); /* Plain bad... */ - - /* - * XXX: We could patch up st->stevedore and st->priv here - * XXX: but if things go right, we will never need them. - */ - return (0); -} - -/*--------------------------------------------------------------------- - * objcore methods for persistent objects - */ - -static struct object * -smp_oc_getobj(struct worker *wrk, struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - struct storage *st; - uint64_t l; - int bad; - - /* Some calls are direct, but they should match anyway */ - assert(oc->methods->getobj == smp_oc_getobj); - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (wrk == NULL) - AZ(oc->flags & OC_F_NEEDFIXUP); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - o = (void*)(sg->sc->base + so->ptr); - /* - * The object may not be in this segment since we allocate it - * In a separate operation than the smp_object. We could check - * that it is in a later segment, but that would be complicated. - * XXX: For now, be happy if it is inside th silo - */ - ASSERT_PTR_IN_SILO(sg->sc, o); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - - /* - * If this flag is not set, it will not be, and the lock is not - * needed to test it. - */ - if (!(oc->flags & OC_F_NEEDFIXUP)) - return (o); - - AN(wrk); - Lck_Lock(&sg->sc->mtx); - /* Check again, we might have raced. */ - if (oc->flags & OC_F_NEEDFIXUP) { - /* We trust caller to have a refcnt for us */ - o->objcore = oc; - - bad = 0; - l = 0; - VTAILQ_FOREACH(st, &o->store, list) { - bad |= smp_loaded_st(sg->sc, sg, st); - if (bad) - break; - l += st->len; - } - if (l != o->len) - bad |= 0x100; - - if(bad) { - o->ttl = 0; - o->grace = 0; - so->ttl = 0; - } - - sg->nfixed++; - wrk->stats.n_object++; - wrk->stats.n_vampireobject--; - oc->flags &= ~OC_F_NEEDFIXUP; - } - Lck_Unlock(&sg->sc->mtx); - return (o); -} - -static void -smp_oc_updatemeta(struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - double mttl; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - so = smp_find_so(sg, oc); - - if (isnan(o->grace)) - mttl = o->ttl; - else - mttl = - (o->ttl + o->grace); - - if (sg == sg->sc->cur_seg) { - /* Lock necessary, we might race close_seg */ - Lck_Lock(&sg->sc->mtx); - so->ban = o->ban_t; - so->ttl = mttl; - Lck_Unlock(&sg->sc->mtx); - } else { - so->ban = o->ban_t; - so->ttl = mttl; - } -} - -static void __match_proto__() -smp_oc_freeobj(struct objcore *oc) -{ - struct smp_seg *sg; - struct smp_object *so; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - Lck_Lock(&sg->sc->mtx); - so->ttl = 0; - so->ptr = 0; - - assert(sg->nobj > 0); - assert(sg->nfixed > 0); - sg->nobj--; - sg->nfixed--; - - Lck_Unlock(&sg->sc->mtx); -} - -static struct objcore_methods smp_oc_methods = { - .getobj = smp_oc_getobj, - .updatemeta = smp_oc_updatemeta, - .freeobj = smp_oc_freeobj, -}; - -/*-------------------------------------------------------------------- - * Add a new ban to all silos - */ - -static void -smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, - uint32_t flags, uint32_t len, const char *ban) -{ - uint8_t *ptr, *ptr2; - - (void)sc; - ptr = ptr2 = SIGN_END(ctx); - - memcpy(ptr, "BAN", 4); - ptr += 4; - - memcpy(ptr, &t0, sizeof t0); - ptr += sizeof t0; - - memcpy(ptr, &flags, sizeof flags); - ptr += sizeof flags; - - memcpy(ptr, &len, sizeof len); - ptr += sizeof len; - - memcpy(ptr, ban, len); - ptr += len; - - smp_append_sign(ctx, ptr2, ptr - ptr2); -} - -void -SMP_NewBan(double t0, const char *ban) -{ - struct smp_sc *sc; - uint32_t l = strlen(ban) + 1; - - VTAILQ_FOREACH(sc, &silos, list) { - smp_appendban(sc, &sc->ban1, t0, 0, l, ban); - smp_appendban(sc, &sc->ban2, t0, 0, l, ban); - } -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a ban list - */ - -static int -smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint8_t *ptr, *pe; - double t0; - uint32_t flags, length; - int i, retval = 0; - - ASSERT_CLI(); - (void)sc; - i = smp_chk_sign(ctx); - if (i) - return (i); - ptr = SIGN_DATA(ctx); - pe = ptr + ctx->ss->length; - - while (ptr < pe) { - if (memcmp(ptr, "BAN", 4)) { - retval = 1001; - break; - } - ptr += 4; - - memcpy(&t0, ptr, sizeof t0); - ptr += sizeof t0; - - memcpy(&flags, ptr, sizeof flags); - ptr += sizeof flags; - if (flags != 0) { - retval = 1002; - break; - } - - memcpy(&length, ptr, sizeof length); - ptr += sizeof length; - if (ptr + length > pe) { - retval = 1003; - break; - } - - if (ptr[length - 1] != '\0') { - retval = 1004; - break; - } - - BAN_Reload(t0, flags, (const char *)ptr); - - ptr += length; - } - assert(ptr <= pe); - return (retval); -} - - -/*--------------------------------------------------------------------*/ - -static uint64_t -smp_segend(const struct smp_seg *sg) -{ - - return (sg->p.offset + sg->p.length); -} - -static uint64_t -smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) -{ - - IASSERTALIGN(sc, sc->next_bot); - assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - assert(sc->next_bot >= sg->p.offset); - assert(sc->next_top < sg->p.offset + sg->p.length); - return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); -} - -/*-------------------------------------------------------------------- - * Load segments - * - * The overall objective is to register the existence of an object, based - * only on the minimally sized struct smp_object, without causing the - * main object to be faulted in. - * - * XXX: We can test this by mprotecting the main body of the segment - * XXX: until the first fixup happens, or even just over this loop, - * XXX: However: the requires that the smp_objects starter further - * XXX: into the segment than a page so that they do not get hit - * XXX: by the protection. - */ - -static void -smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) -{ - struct smp_object *so; - struct objcore *oc; - uint32_t no; - double t_now = TIM_real(); - struct smp_signctx ctx[1]; - - ASSERT_SILO_THREAD(sc); - CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - assert(sg->flags & SMP_SEG_MUSTLOAD); - sg->flags &= ~SMP_SEG_MUSTLOAD; - AN(sg->p.offset); - if (sg->p.objlist == 0) - return; - smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); - if (smp_chk_sign(ctx)) - return; - - /* test SEGTAIL */ - /* test OBJIDX */ - so = (void*)(sc->base + sg->p.objlist); - sg->objs = so; - no = sg->p.lobjlist; - /* Clear the bogus "hold" count */ - sg->nobj = 0; - for (;no > 0; so++,no--) { - if (so->ttl > 0 && so->ttl < t_now) - continue; - if (so->ttl < 0 && -so->ttl < t_now) - continue; - HSH_Prealloc(sp); - oc = sp->wrk->nobjcore; - oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; - oc->flags &= ~OC_F_BUSY; - oc->priv = sg; - oc->priv2 = no; - oc->methods = &smp_oc_methods; - oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); - memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); - (void)HSH_Insert(sp); - AZ(sp->wrk->nobjcore); - EXP_Inject(oc, sg->lru, fabs(so->ttl)); - sg->nobj++; - } - WRK_SumStat(sp->wrk); - sg->flags |= SMP_SEG_LOADED; -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a segment list - */ - -static int -smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint64_t length, l; - struct smp_segptr *ss, *se; - struct smp_seg *sg, *sg1, *sg2; - int i, n = 0; - - ASSERT_CLI(); - i = smp_chk_sign(ctx); - if (i) - return (i); - - ss = SIGN_DATA(ctx); - length = ctx->ss->length; - - if (length == 0) { - /* No segments */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - return (0); - } - se = ss + length / sizeof *ss; - se--; - assert(ss <= se); - - /* - * Locate the free reserve, there are only two basic cases, - * but once we start dropping segments, things gets more complicated. - */ - - sc->free_offset = se->offset + se->length; - l = sc->mediasize - sc->free_offset; - if (se->offset > ss->offset && l >= sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * Plenty of space at tail, do nothing. - */ - } else if (ss->offset > se->offset) { - /* - * [zzzz____xxxxyyyy_] - * (make) space between ends - * We might nuke the entire tail end without getting - * enough space, in which case we fall through to the - * last check. - */ - while (ss < se && ss->offset > se->offset) { - l = ss->offset - (se->offset + se->length); - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - if (l < sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * (make) space at front - */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - while (ss < se) { - l = ss->offset - sc->free_offset; - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - assert (l >= sc->free_reserve); - - - sg1 = NULL; - sg2 = NULL; - for(; ss <= se; ss++) { - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - sg->p = *ss; - - sg->flags |= SMP_SEG_MUSTLOAD; - - /* - * HACK: prevent save_segs from nuking segment until we have - * HACK: loaded it. - */ - sg->nobj = 1; - if (sg1 != NULL) { - assert(sg1->p.offset != sg->p.offset); - if (sg1->p.offset < sg->p.offset) - assert(smp_segend(sg1) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg1->p.offset); - } - if (sg2 != NULL) { - assert(sg2->p.offset != sg->p.offset); - if (sg2->p.offset < sg->p.offset) - assert(smp_segend(sg2) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg2->p.offset); - } - - /* XXX: check that they are inside silo */ - /* XXX: check that they don't overlap */ - /* XXX: check that they are serial */ - sg->sc = sc; - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - sg2 = sg; - if (sg1 == NULL) - sg1 = sg; - } - printf("Dropped %d segments to make free_reserve\n", n); - return (0); -} - -/*-------------------------------------------------------------------- - * Create a new segment - */ - -static void -smp_new_seg(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->sc = sc; - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - - /* XXX: find where it goes in silo */ - - sg->p.offset = sc->free_offset; - // XXX: align */ - assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); - assert(sg->p.offset < sc->mediasize); - - sg->p.length = sc->aim_segl; - sg->p.length &= ~7; - - if (smp_segend(sg) > sc->mediasize) { - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - sg->p.offset = sc->free_offset; - sg2 = VTAILQ_FIRST(&sc->segments); - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - } - - - assert(smp_segend(sg) <= sc->mediasize); - - sg2 = VTAILQ_FIRST(&sc->segments); - if (sg2 != NULL && sg2->p.offset > sc->free_offset) { - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - assert(smp_segend(sg) <= sg2->p.offset); - } - - sg->p.offset = IRNUP(sc, sg->p.offset); - sg->p.length = IRNDN(sc, sg->p.length); - sc->free_offset = sg->p.offset + sg->p.length; - - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - - /* Neuter the new segment in case there is an old one there */ - AN(sg->p.offset); - smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Set up our allocation points */ - sc->cur_seg = sg; - sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); - sc->next_top = smp_segend(sg); - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - IASSERTALIGN(sc, sc->next_bot); - IASSERTALIGN(sc, sc->next_top); - sg->objs = (void*)(sc->base + sc->next_top); -} - -/*-------------------------------------------------------------------- - * Close a segment - */ - -static void -smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) -{ - uint64_t left, dst, len; - void *dp; - - Lck_AssertHeld(&sc->mtx); - - assert(sg == sc->cur_seg); - AN(sg->p.offset); - sc->cur_seg = NULL; - - if (sg->nalloc == 0) { - /* XXX: if segment is empty, delete instead */ - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - return; - } - - /* - * If there is enough space left, that we can move the smp_objects - * down without overwriting the present copy, we will do so to - * compact the segment. - */ - left = smp_spaceleft(sc, sg); - len = sizeof(struct smp_object) * sg->p.lobjlist; - if (len < left) { - dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); - dp = sc->base + dst; - assert((uintptr_t)dp + len < (uintptr_t)sg->objs); - memcpy(dp, sg->objs, len); - sc->next_top = dst; - sg->objs = dp; - sg->p.length = (sc->next_top - sg->p.offset) - + len + IRNUP(sc, SMP_SIGN_SPACE); - (void)smp_spaceleft(sc, sg); /* for the asserts */ - - } - - /* Update the segment header */ - sg->p.objlist = sc->next_top; - - /* Write the (empty) OBJIDX signature */ - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - assert(sc->next_top >= sc->next_bot); - smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Write the (empty) SEGTAIL signature */ - smp_def_sign(sc, sg->ctx, - sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Save segment list */ - smp_save_segs(sc); - sc->free_offset = smp_segend(sg); -} - -/*-------------------------------------------------------------------- - * Silo worker thread - */ - -static void * -smp_thread(struct sess *sp, void *priv) -{ - struct smp_sc *sc; - struct smp_seg *sg; - - (void)sp; - CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); - - /* First, load all the objects from all segments */ - VTAILQ_FOREACH(sg, &sc->segments, list) - if (sg->flags & SMP_SEG_MUSTLOAD) - smp_load_seg(sp, sc, sg); - - sc->flags |= SMP_SC_LOADED; - BAN_Deref(&sc->tailban); - sc->tailban = NULL; - printf("Silo completely loaded\n"); - while (1) - (void)sleep (1); - NEEDLESS_RETURN(NULL); -} - -/*-------------------------------------------------------------------- - * Open a silo in the worker process - */ - -static void -smp_open(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - - Lck_New(&sc->mtx, lck_smp); - Lck_Lock(&sc->mtx); - - sc->stevedore = st; - - /* We trust the parent to give us a valid silo, for good measure: */ - AZ(smp_valid_silo(sc)); - - AZ(mprotect(sc->base, 4096, PROT_READ)); - - sc->ident = SIGN_DATA(&sc->idn); - - /* We attempt ban1 first, and if that fails, try ban2 */ - if (smp_open_bans(sc, &sc->ban1)) - AZ(smp_open_bans(sc, &sc->ban2)); - - /* We attempt seg1 first, and if that fails, try seg2 */ - if (smp_open_segs(sc, &sc->seg1)) - AZ(smp_open_segs(sc, &sc->seg2)); - - sc->tailban = BAN_TailRef(); - AN(sc->tailban); - - /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ - - /* XXX: abandon early segments to make sure we have free space ? */ - - /* Open a new segment, so we are ready to write */ - smp_new_seg(sc); - - /* Start the worker silo worker thread, it will load the objects */ - WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); - - VTAILQ_INSERT_TAIL(&silos, sc, list); - Lck_Unlock(&sc->mtx); -} - -/*-------------------------------------------------------------------- - * Close a silo - */ - -static void -smp_close(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - Lck_Lock(&sc->mtx); - smp_close_seg(sc, sc->cur_seg); - Lck_Unlock(&sc->mtx); - - /* XXX: reap thread */ -} - -/*-------------------------------------------------------------------- - * Allocate a bite. - * - * Allocate [min_size...max_size] space from the bottom of the segment, - * as is convenient. - * - * If 'so' + 'idx' is given, also allocate a smp_object from the top - * of the segment. - * - * Return the segment in 'ssg' if given. - */ - -static struct storage * -smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, - struct smp_object **so, unsigned *idx, struct smp_seg **ssg) -{ - struct smp_sc *sc; - struct storage *ss; - struct smp_seg *sg; - unsigned tries; - uint64_t left, extra; - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - assert(min_size <= max_size); - - max_size = IRNUP(sc, max_size); - min_size = IRNUP(sc, min_size); - - extra = IRNUP(sc, sizeof(*ss)); - if (so != NULL) { - extra += sizeof(**so); - AN(idx); - } - - Lck_Lock(&sc->mtx); - sg = NULL; - ss = NULL; - for (tries = 0; tries < 3; tries++) { - left = smp_spaceleft(sc, sc->cur_seg); - if (left >= extra + min_size) - break; - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } - if (left >= extra + min_size) { - if (left < extra + max_size) - max_size = IRNDN(sc, left - extra); - - sg = sc->cur_seg; - ss = (void*)(sc->base + sc->next_bot); - sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); - sg->nalloc++; - if (so != NULL) { - sc->next_top -= sizeof(**so); - *so = (void*)(sc->base + sc->next_top); - /* Render this smp_object mostly harmless */ - (*so)->ttl = 0.; - (*so)->ban = 0.; - (*so)->ptr = 0;; - sg->objs = *so; - *idx = ++sg->p.lobjlist; - } - (void)smp_spaceleft(sc, sg); /* for the assert */ - } - Lck_Unlock(&sc->mtx); - - if (ss == NULL) - return (ss); - AN(sg); - assert(max_size >= min_size); - - /* Fill the storage structure */ - memset(ss, 0, sizeof *ss); - ss->magic = STORAGE_MAGIC; - ss->ptr = PRNUP(sc, ss + 1); - ss->space = max_size; - ss->priv = sc; - ss->stevedore = st; - ss->fd = sc->fd; - if (ssg != NULL) - *ssg = sg; - return (ss); -} - -/*-------------------------------------------------------------------- - * Find the per-segment lru list for this object - */ - -static struct lru * -smp_getlru(const struct object *o) -{ - struct smp_seg *sg; - - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); - return (sg->lru); -} - -/*-------------------------------------------------------------------- - * Allocate an object - */ - -static struct object * -smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, - const struct stv_objsecrets *soc) -{ - struct object *o; - struct storage *st; - struct smp_sc *sc; - struct smp_seg *sg; - struct smp_object *so; - struct objcore *oc; - unsigned objidx; - - CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); - AN(sp->objcore); - AN(sp->wrk->ttl >= 0); - - ltot = IRNUP(sc, ltot); - - st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); - if (st == NULL) - return (NULL); - - assert(st->space >= ltot); - ltot = st->len = st->space; - - o = STV_MkObject(sp, st->ptr, ltot, soc); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - o->objstore = st; - - oc = o->objcore; - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - oc->flags |= OC_F_LRUDONTMOVE; - - Lck_Lock(&sc->mtx); - sg->nfixed++; - sg->nobj++; - - /* We have to do this somewhere, might as well be here... */ - assert(sizeof so->hash == DIGEST_LEN); - memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); - so->ttl = o->ttl; /* XXX: grace? */ - so->ptr = (uint8_t*)o - sc->base; - so->ban = o->ban_t; - - oc->priv = sg; - oc->priv2 = objidx; - oc->methods = &smp_oc_methods; - - Lck_Unlock(&sc->mtx); - return (o); -} - -/*-------------------------------------------------------------------- - * Allocate a bite - */ - -static struct storage * -smp_alloc(struct stevedore *st, size_t size) -{ - - return (smp_allocx(st, - size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); -} - -/*-------------------------------------------------------------------- - * Trim a bite - * XXX: We could trim the last allocation. - */ - -static void -smp_trim(struct storage *ss, size_t size) -{ - - (void)ss; - (void)size; -} - -/*-------------------------------------------------------------------- - * We don't track frees of storage, we track the objects which own the - * storage and when there are no more objects in in the first segment, - * it can be reclaimed. - * XXX: We could free the last allocation, but does that happen ? - */ - -static void __match_proto__(storage_free_f) -smp_free(struct storage *st) -{ - - /* XXX */ - (void)st; -} - -/*-------------------------------------------------------------------- - * Pause until all silos have loaded. - */ - -void -SMP_Ready(void) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - do { - VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_SC_LOADED)) - break; - if (sc != NULL) - (void)sleep(1); - } while (sc != NULL); -} - -/*--------------------------------------------------------------------*/ - -const struct stevedore smp_stevedore = { - .magic = STEVEDORE_MAGIC, - .name = "persistent", - .init = smp_init, - .open = smp_open, - .close = smp_close, - .alloc = smp_alloc, - .allocobj = smp_allocobj, - .getlru = smp_getlru, - .free = smp_free, - .trim = smp_trim, -}; - -/*-------------------------------------------------------------------- - * Persistence is a bear to test unadultered, so we cheat by adding - * a cli command we can use to make it do tricks for us. - */ - -static void -debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) -{ - struct smp_seg *sg; - struct objcore *oc; - - cli_out(cli, "Silo: %s (%s)\n", - sc->stevedore->ident, sc->filename); - VTAILQ_FOREACH(sg, &sc->segments, list) { - cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", - (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); - if (sg == sc->cur_seg) - cli_out(cli, - " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", - (uintmax_t)(sc->next_bot), - (uintmax_t)(sc->next_top), - (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", - sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); - if (objs) { - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) - cli_out(cli, " %s %p\n", - oc == &sg->lru->senteniel ? - "senteniel" : "OC: ", oc); - } - } -} - -static void -debug_persistent(struct cli *cli, const char * const * av, void *priv) -{ - struct smp_sc *sc; - - (void)priv; - - if (av[2] == NULL) { - VTAILQ_FOREACH(sc, &silos, list) - debug_report_silo(cli, sc, 0); - return; - } - VTAILQ_FOREACH(sc, &silos, list) - if (!strcmp(av[2], sc->stevedore->ident)) - break; - if (sc == NULL) { - cli_out(cli, "Silo <%s> not found\n", av[2]); - cli_result(cli, CLIS_PARAM); - return; - } - if (av[3] == NULL) { - debug_report_silo(cli, sc, 0); - return; - } - Lck_Lock(&sc->mtx); - if (!strcmp(av[3], "sync")) { - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } else if (!strcmp(av[3], "dump")) { - debug_report_silo(cli, sc, 1); - } else { - cli_out(cli, "Unknown operation\n"); - cli_result(cli, CLIS_PARAM); - } - Lck_Unlock(&sc->mtx); -} - -static struct cli_proto debug_cmds[] = { - { "debug.persistent", "debug.persistent", - "Persistent debugging magic:\n" - "\tdebug.persistent [stevedore [cmd]]\n" - "With no cmd arg, a summary of the silo is returned.\n" - "Possible commands:\n" - "\tsync\tClose current segment, open a new one\n" - "\tdump\tinclude objcores in silo summary\n" - "", - 0, 2, "d", debug_persistent }, - { NULL } -}; - -void -SMP_Init(void) -{ - CLI_AddFuncs(debug_cmds); -} From phk at varnish-cache.org Tue Feb 8 10:50:30 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:30 +0100 Subject: [master] 0cca0bf clone from storage_persistent.c Message-ID: commit 0cca0bf8e940850abdf083460141d1eae0e33da0 Author: Poul-Henning Kamp Date: Tue Feb 8 09:50:21 2011 +0000 clone from storage_persistent.c diff --git a/bin/varnishd/storage_persistent_mgt.c b/bin/varnishd/storage_persistent_mgt.c new file mode 100644 index 0000000..9e51120 --- /dev/null +++ b/bin/varnishd/storage_persistent_mgt.c @@ -0,0 +1,1580 @@ +/*- + * Copyright (c) 2008-2010 Linpro AS + * All rights reserved. + * + * Author: Poul-Henning Kamp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Persistent storage method + * + * XXX: Before we start the client or maybe after it stops, we should give the + * XXX: stevedores a chance to examine their storage for consistency. + * + * XXX: Do we ever free the LRU-lists ? + */ + +#include "config.h" + +#include "svnid.h" +SVNID("$Id$") + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cache.h" +#include "stevedore.h" +#include "hash_slinger.h" +#include "vsha256.h" +#include "cli.h" +#include "cli_priv.h" + +#include "persistent.h" +#include "storage_persistent.h" + +/*--------------------------------------------------------------------*/ + +/* + * silos is unlocked, it only changes during startup when we are + * single-threaded + */ +static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); + +/*-------------------------------------------------------------------- + * SIGNATURE functions + * The signature is SHA256 over: + * 1. The smp_sign struct up to but not including the length field. + * 2. smp_sign->length bytes, starting after the smp_sign structure + * 3. The smp-sign->length field. + * The signature is stored after the byte-range from step 2. + */ + +/*-------------------------------------------------------------------- + * Define a signature by location and identifier. + */ + +static void +smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + + AZ(off & 7); /* Alignment */ + assert(strlen(id) < sizeof ctx->ss->ident); + + memset(ctx, 0, sizeof ctx); + ctx->ss = (void*)(sc->base + off); + ctx->unique = sc->unique; + ctx->id = id; +} + +/*-------------------------------------------------------------------- + * Check that a signature is good, leave state ready for append + */ +static int +smp_chk_sign(struct smp_signctx *ctx) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + int r = 0; + + if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) + r = 1; + else if (ctx->unique != ctx->ss->unique) + r = 2; + else if ((uintptr_t)ctx->ss != ctx->ss->mapped) + r = 3; + else { + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + if (memcmp(sign, SIGN_END(ctx), sizeof sign)) + r = 4; + } + if (r) { + fprintf(stderr, "CHK(%p %s %p %s) = %d\n", + ctx, ctx->id, ctx->ss, + r > 1 ? ctx->ss->ident : "", r); + } + return (r); +} + +/*-------------------------------------------------------------------- + * Append data to a signature + */ +static void +smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + + if (len != 0) { + SHA256_Update(&ctx->ctx, ptr, len); + ctx->ss->length += len; + } + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + memcpy(SIGN_END(ctx), sign, sizeof sign); +XXXAZ(smp_chk_sign(ctx)); +} + +/*-------------------------------------------------------------------- + * Reset a signature to empty, prepare for appending. + */ + +static void +smp_reset_sign(struct smp_signctx *ctx) +{ + + memset(ctx->ss, 0, sizeof *ctx->ss); + strcpy(ctx->ss->ident, ctx->id); + ctx->ss->unique = ctx->unique; + ctx->ss->mapped = (uintptr_t)ctx->ss; + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + smp_append_sign(ctx, NULL, 0); +} + +/*-------------------------------------------------------------------- + * Force a write of a signature block to the backing store. + */ + +static void +smp_sync_sign(const struct smp_signctx *ctx) +{ + int i; + + /* XXX: round to pages */ + i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); + if (i && 0) + fprintf(stderr, "SyncSign(%p %s) = %d %s\n", + ctx->ss, ctx->id, i, strerror(errno)); +} + +/*-------------------------------------------------------------------- + * Create and force a new signature to backing store + */ + +static void +smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + smp_def_sign(sc, ctx, off, id); + smp_reset_sign(ctx); + smp_sync_sign(ctx); +} + +/*-------------------------------------------------------------------- + * Caculate payload of some stuff + */ + +static uint64_t +smp_stuff_len(const struct smp_sc *sc, unsigned stuff) +{ + uint64_t l; + + assert(stuff < SMP_END_STUFF); + l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; + l -= SMP_SIGN_SPACE; + return (l); +} + +/*-------------------------------------------------------------------- + * Initialize a Silo with a valid but empty structure. + * + * XXX: more intelligent sizing of things. + */ + +static void +smp_newsilo(struct smp_sc *sc) +{ + struct smp_ident *si; + + ASSERT_MGT(); + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + /* Choose a new random number */ + sc->unique = random(); + + smp_reset_sign(&sc->idn); + si = sc->ident; + + memset(si, 0, sizeof *si); + strcpy(si->ident, SMP_IDENT_STRING); + si->byte_order = 0x12345678; + si->size = sizeof *si; + si->major_version = 2; + si->unique = sc->unique; + si->mediasize = sc->mediasize; + si->granularity = sc->granularity; + /* + * Aim for cache-line-width + */ + si->align = sizeof(void*) * 2; + sc->align = si->align; + + si->stuff[SMP_BAN1_STUFF] = sc->granularity; + si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; + si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; + si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; + si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; + si->stuff[SMP_END_STUFF] = si->mediasize; + assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); + + smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + smp_append_sign(&sc->idn, si, sizeof *si); + smp_sync_sign(&sc->idn); +} + +/*-------------------------------------------------------------------- + * Check if a silo is valid. + */ + +static int +smp_valid_silo(struct smp_sc *sc) +{ + struct smp_ident *si; + int i, j; + + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + if (smp_chk_sign(&sc->idn)) + return (1); + + si = sc->ident; + if (strcmp(si->ident, SMP_IDENT_STRING)) + return (2); + if (si->byte_order != 0x12345678) + return (3); + if (si->size != sizeof *si) + return (4); + if (si->major_version != 2) + return (5); + if (si->mediasize != sc->mediasize) + return (7); + if (si->granularity != sc->granularity) + return (8); + if (si->align < sizeof(void*)) + return (9); + if (!PWR2(si->align)) + return (10); + sc->align = si->align; + sc->unique = si->unique; + + /* XXX: Sanity check stuff[6] */ + + assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); + assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); + assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); + assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); + assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); + assert(si->stuff[SMP_END_STUFF] == sc->mediasize); + + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == + smp_stuff_len(sc, SMP_SEG2_STUFF)); + + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == + smp_stuff_len(sc, SMP_BAN2_STUFF)); + + smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + /* We must have one valid BAN table */ + i = smp_chk_sign(&sc->ban1); + j = smp_chk_sign(&sc->ban2); + if (i && j) + return (100 + i * 10 + j); + + /* We must have one valid SEG table */ + i = smp_chk_sign(&sc->seg1); + j = smp_chk_sign(&sc->seg2); + if (i && j) + return (200 + i * 10 + j); + return (0); +} + +/*-------------------------------------------------------------------- + * Calculate cleaner metrics from silo dimensions + */ + +static void +smp_metrics(struct smp_sc *sc) +{ + + /* + * We do not want to loose too big chunks of the silos + * content when we are forced to clean a segment. + * + * For now insist that a segment covers no more than 1% of the silo. + * + * XXX: This should possibly depend on the size of the silo so + * XXX: trivially small silos do not run into trouble along + * XXX: the lines of "one object per segment". + */ + + sc->min_nseg = 10; + sc->max_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->min_nseg; + + fprintf(stderr, "min_nseg = %u, max_segl = %ju\n", + sc->min_nseg, (uintmax_t)sc->max_segl); + + /* + * The number of segments are limited by the size of the segment + * table(s) and from that follows the minimum size of a segmement. + */ + + sc->max_nseg = smp_stuff_len(sc, SMP_SEG1_STUFF) / sc->min_nseg; + sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; + + while (sc->min_segl < sizeof(struct object)) { + sc->max_nseg /= 2; + sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; + } + + fprintf(stderr, "max_nseg = %u, min_segl = %ju\n", + sc->max_nseg, (uintmax_t)sc->min_segl); + + /* + * Set our initial aim point at the exponential average of the + * two extremes. + * + * XXX: This is a pretty arbitrary choice, but having no idea + * XXX: object count, size distribution or ttl pattern at this + * XXX: point, we have to do something. + */ + + sc->aim_nseg = + (unsigned) exp((log(sc->min_nseg) + log(sc->max_nseg))*.5); + sc->aim_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->aim_nseg; + + fprintf(stderr, "aim_nseg = %u, aim_segl = %ju\n", + sc->aim_nseg, (uintmax_t)sc->aim_segl); + + /* + * How much space in the free reserve pool ? + */ + sc->free_reserve = sc->aim_segl * 10; + + fprintf(stderr, "free_reserve = %ju\n", sc->free_reserve); +} + +/*-------------------------------------------------------------------- + * Set up persistent storage silo in the master process. + */ + +static void +smp_init(struct stevedore *parent, int ac, char * const *av) +{ + struct smp_sc *sc; + int i; + + ASSERT_MGT(); + + AZ(av[ac]); +#define SIZOF(foo) fprintf(stderr, \ + "sizeof(%s) = %zu = 0x%zx\n", #foo, sizeof(foo), sizeof(foo)); + SIZOF(struct smp_ident); + SIZOF(struct smp_sign); + SIZOF(struct smp_segptr); + SIZOF(struct smp_object); +#undef SIZOF + + /* See comments in persistent.h */ + assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); + + /* Allocate softc */ + ALLOC_OBJ(sc, SMP_SC_MAGIC); + XXXAN(sc); + sc->parent = parent; + sc->fd = -1; + VTAILQ_INIT(&sc->segments); + + /* Argument processing */ + if (ac != 2) + ARGV_ERR("(-spersistent) wrong number of arguments\n"); + + i = STV_GetFile(av[0], &sc->fd, &sc->filename, "-spersistent"); + if (i == 2) + ARGV_ERR("(-spersistent) need filename (not directory)\n"); + + sc->align = sizeof(void*) * 2; + sc->granularity = getpagesize(); + sc->mediasize = STV_FileSize(sc->fd, av[1], &sc->granularity, + "-spersistent"); + + AZ(ftruncate(sc->fd, sc->mediasize)); + + sc->base = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, + MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, 0); + + if (sc->base == MAP_FAILED) + ARGV_ERR("(-spersistent) failed to mmap (%s)\n", + strerror(errno)); + + smp_def_sign(sc, &sc->idn, 0, "SILO"); + sc->ident = SIGN_DATA(&sc->idn); + + i = smp_valid_silo(sc); + if (i) + smp_newsilo(sc); + AZ(smp_valid_silo(sc)); + + smp_metrics(sc); + + parent->priv = sc; + + /* XXX: only for sendfile I guess... */ + mgt_child_inherit(sc->fd, "storage_persistent"); +} + + +/*-------------------------------------------------------------------- + * Write the segmentlist back to the silo. + * + * We write the first copy, sync it synchronously, then write the + * second copy and sync it synchronously. + * + * Provided the kernel doesn't lie, that means we will always have + * at least one valid copy on in the silo. + */ + +static void +smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) +{ + struct smp_segptr *ss; + struct smp_seg *sg; + uint64_t length; + + Lck_AssertHeld(&sc->mtx); + smp_reset_sign(ctx); + ss = SIGN_DATA(ctx); + length = 0; + VTAILQ_FOREACH(sg, &sc->segments, list) { + assert(sg->p.offset < sc->mediasize); + assert(sg->p.offset + sg->p.length <= sc->mediasize); + *ss = sg->p; + ss++; + length += sizeof *ss; + } + smp_append_sign(ctx, SIGN_DATA(ctx), length); + smp_sync_sign(ctx); +} + +static void +smp_save_segs(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + + /* + * Remove empty segments from the front of the list + * before we write the segments to disk. + */ + VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { + if (sg->nobj > 0) + break; + if (sg == sc->cur_seg) + continue; + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + } + smp_save_seg(sc, &sc->seg1); + smp_save_seg(sc, &sc->seg2); +} + + +/*--------------------------------------------------------------------- + */ + +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) +{ + struct smp_object *so; + unsigned smp_idx; + + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; + return (so); +} + +/*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use + */ + +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) +{ + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; + + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; + + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ + + o = pst - sc->base; + + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) + break; + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ + + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ + + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ + + if (st->len > st->space) + return (0x40); /* Plain bad... */ + + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); +} + +/*--------------------------------------------------------------------- + * objcore methods for persistent objects + */ + +static struct object * +smp_oc_getobj(struct worker *wrk, struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + struct storage *st; + uint64_t l; + int bad; + + /* Some calls are direct, but they should match anyway */ + assert(oc->methods->getobj == smp_oc_getobj); + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + if (wrk == NULL) + AZ(oc->flags & OC_F_NEEDFIXUP); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + o = (void*)(sg->sc->base + so->ptr); + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + + /* + * If this flag is not set, it will not be, and the lock is not + * needed to test it. + */ + if (!(oc->flags & OC_F_NEEDFIXUP)) + return (o); + + AN(wrk); + Lck_Lock(&sg->sc->mtx); + /* Check again, we might have raced. */ + if (oc->flags & OC_F_NEEDFIXUP) { + /* We trust caller to have a refcnt for us */ + o->objcore = oc; + + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; + + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } + + sg->nfixed++; + wrk->stats.n_object++; + wrk->stats.n_vampireobject--; + oc->flags &= ~OC_F_NEEDFIXUP; + } + Lck_Unlock(&sg->sc->mtx); + return (o); +} + +static void +smp_oc_updatemeta(struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + double mttl; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + o = smp_oc_getobj(NULL, oc); + AN(o); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); + so = smp_find_so(sg, oc); + + if (isnan(o->grace)) + mttl = o->ttl; + else + mttl = - (o->ttl + o->grace); + + if (sg == sg->sc->cur_seg) { + /* Lock necessary, we might race close_seg */ + Lck_Lock(&sg->sc->mtx); + so->ban = o->ban_t; + so->ttl = mttl; + Lck_Unlock(&sg->sc->mtx); + } else { + so->ban = o->ban_t; + so->ttl = mttl; + } +} + +static void __match_proto__() +smp_oc_freeobj(struct objcore *oc) +{ + struct smp_seg *sg; + struct smp_object *so; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + Lck_Lock(&sg->sc->mtx); + so->ttl = 0; + so->ptr = 0; + + assert(sg->nobj > 0); + assert(sg->nfixed > 0); + sg->nobj--; + sg->nfixed--; + + Lck_Unlock(&sg->sc->mtx); +} + +static struct objcore_methods smp_oc_methods = { + .getobj = smp_oc_getobj, + .updatemeta = smp_oc_updatemeta, + .freeobj = smp_oc_freeobj, +}; + +/*-------------------------------------------------------------------- + * Add a new ban to all silos + */ + +static void +smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, + uint32_t flags, uint32_t len, const char *ban) +{ + uint8_t *ptr, *ptr2; + + (void)sc; + ptr = ptr2 = SIGN_END(ctx); + + memcpy(ptr, "BAN", 4); + ptr += 4; + + memcpy(ptr, &t0, sizeof t0); + ptr += sizeof t0; + + memcpy(ptr, &flags, sizeof flags); + ptr += sizeof flags; + + memcpy(ptr, &len, sizeof len); + ptr += sizeof len; + + memcpy(ptr, ban, len); + ptr += len; + + smp_append_sign(ctx, ptr2, ptr - ptr2); +} + +void +SMP_NewBan(double t0, const char *ban) +{ + struct smp_sc *sc; + uint32_t l = strlen(ban) + 1; + + VTAILQ_FOREACH(sc, &silos, list) { + smp_appendban(sc, &sc->ban1, t0, 0, l, ban); + smp_appendban(sc, &sc->ban2, t0, 0, l, ban); + } +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a ban list + */ + +static int +smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint8_t *ptr, *pe; + double t0; + uint32_t flags, length; + int i, retval = 0; + + ASSERT_CLI(); + (void)sc; + i = smp_chk_sign(ctx); + if (i) + return (i); + ptr = SIGN_DATA(ctx); + pe = ptr + ctx->ss->length; + + while (ptr < pe) { + if (memcmp(ptr, "BAN", 4)) { + retval = 1001; + break; + } + ptr += 4; + + memcpy(&t0, ptr, sizeof t0); + ptr += sizeof t0; + + memcpy(&flags, ptr, sizeof flags); + ptr += sizeof flags; + if (flags != 0) { + retval = 1002; + break; + } + + memcpy(&length, ptr, sizeof length); + ptr += sizeof length; + if (ptr + length > pe) { + retval = 1003; + break; + } + + if (ptr[length - 1] != '\0') { + retval = 1004; + break; + } + + BAN_Reload(t0, flags, (const char *)ptr); + + ptr += length; + } + assert(ptr <= pe); + return (retval); +} + + +/*--------------------------------------------------------------------*/ + +static uint64_t +smp_segend(const struct smp_seg *sg) +{ + + return (sg->p.offset + sg->p.length); +} + +static uint64_t +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) +{ + + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); +} + +/*-------------------------------------------------------------------- + * Load segments + * + * The overall objective is to register the existence of an object, based + * only on the minimally sized struct smp_object, without causing the + * main object to be faulted in. + * + * XXX: We can test this by mprotecting the main body of the segment + * XXX: until the first fixup happens, or even just over this loop, + * XXX: However: the requires that the smp_objects starter further + * XXX: into the segment than a page so that they do not get hit + * XXX: by the protection. + */ + +static void +smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) +{ + struct smp_object *so; + struct objcore *oc; + uint32_t no; + double t_now = TIM_real(); + struct smp_signctx ctx[1]; + + ASSERT_SILO_THREAD(sc); + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + assert(sg->flags & SMP_SEG_MUSTLOAD); + sg->flags &= ~SMP_SEG_MUSTLOAD; + AN(sg->p.offset); + if (sg->p.objlist == 0) + return; + smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); + if (smp_chk_sign(ctx)) + return; + + /* test SEGTAIL */ + /* test OBJIDX */ + so = (void*)(sc->base + sg->p.objlist); + sg->objs = so; + no = sg->p.lobjlist; + /* Clear the bogus "hold" count */ + sg->nobj = 0; + for (;no > 0; so++,no--) { + if (so->ttl > 0 && so->ttl < t_now) + continue; + if (so->ttl < 0 && -so->ttl < t_now) + continue; + HSH_Prealloc(sp); + oc = sp->wrk->nobjcore; + oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; + oc->flags &= ~OC_F_BUSY; + oc->priv = sg; + oc->priv2 = no; + oc->methods = &smp_oc_methods; + oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); + memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); + (void)HSH_Insert(sp); + AZ(sp->wrk->nobjcore); + EXP_Inject(oc, sg->lru, fabs(so->ttl)); + sg->nobj++; + } + WRK_SumStat(sp->wrk); + sg->flags |= SMP_SEG_LOADED; +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a segment list + */ + +static int +smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint64_t length, l; + struct smp_segptr *ss, *se; + struct smp_seg *sg, *sg1, *sg2; + int i, n = 0; + + ASSERT_CLI(); + i = smp_chk_sign(ctx); + if (i) + return (i); + + ss = SIGN_DATA(ctx); + length = ctx->ss->length; + + if (length == 0) { + /* No segments */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + return (0); + } + se = ss + length / sizeof *ss; + se--; + assert(ss <= se); + + /* + * Locate the free reserve, there are only two basic cases, + * but once we start dropping segments, things gets more complicated. + */ + + sc->free_offset = se->offset + se->length; + l = sc->mediasize - sc->free_offset; + if (se->offset > ss->offset && l >= sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * Plenty of space at tail, do nothing. + */ + } else if (ss->offset > se->offset) { + /* + * [zzzz____xxxxyyyy_] + * (make) space between ends + * We might nuke the entire tail end without getting + * enough space, in which case we fall through to the + * last check. + */ + while (ss < se && ss->offset > se->offset) { + l = ss->offset - (se->offset + se->length); + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + if (l < sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * (make) space at front + */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + while (ss < se) { + l = ss->offset - sc->free_offset; + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + assert (l >= sc->free_reserve); + + + sg1 = NULL; + sg2 = NULL; + for(; ss <= se; ss++) { + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + sg->p = *ss; + + sg->flags |= SMP_SEG_MUSTLOAD; + + /* + * HACK: prevent save_segs from nuking segment until we have + * HACK: loaded it. + */ + sg->nobj = 1; + if (sg1 != NULL) { + assert(sg1->p.offset != sg->p.offset); + if (sg1->p.offset < sg->p.offset) + assert(smp_segend(sg1) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg1->p.offset); + } + if (sg2 != NULL) { + assert(sg2->p.offset != sg->p.offset); + if (sg2->p.offset < sg->p.offset) + assert(smp_segend(sg2) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg2->p.offset); + } + + /* XXX: check that they are inside silo */ + /* XXX: check that they don't overlap */ + /* XXX: check that they are serial */ + sg->sc = sc; + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + sg2 = sg; + if (sg1 == NULL) + sg1 = sg; + } + printf("Dropped %d segments to make free_reserve\n", n); + return (0); +} + +/*-------------------------------------------------------------------- + * Create a new segment + */ + +static void +smp_new_seg(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->sc = sc; + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + + /* XXX: find where it goes in silo */ + + sg->p.offset = sc->free_offset; + // XXX: align */ + assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); + assert(sg->p.offset < sc->mediasize); + + sg->p.length = sc->aim_segl; + sg->p.length &= ~7; + + if (smp_segend(sg) > sc->mediasize) { + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + sg->p.offset = sc->free_offset; + sg2 = VTAILQ_FIRST(&sc->segments); + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + } + + + assert(smp_segend(sg) <= sc->mediasize); + + sg2 = VTAILQ_FIRST(&sc->segments); + if (sg2 != NULL && sg2->p.offset > sc->free_offset) { + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + assert(smp_segend(sg) <= sg2->p.offset); + } + + sg->p.offset = IRNUP(sc, sg->p.offset); + sg->p.length = IRNDN(sc, sg->p.length); + sc->free_offset = sg->p.offset + sg->p.length; + + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + + /* Neuter the new segment in case there is an old one there */ + AN(sg->p.offset); + smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Set up our allocation points */ + sc->cur_seg = sg; + sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sc->next_top = smp_segend(sg); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sc->next_bot); + IASSERTALIGN(sc, sc->next_top); + sg->objs = (void*)(sc->base + sc->next_top); +} + +/*-------------------------------------------------------------------- + * Close a segment + */ + +static void +smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) +{ + uint64_t left, dst, len; + void *dp; + + Lck_AssertHeld(&sc->mtx); + + assert(sg == sc->cur_seg); + AN(sg->p.offset); + sc->cur_seg = NULL; + + if (sg->nalloc == 0) { + /* XXX: if segment is empty, delete instead */ + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + return; + } + + /* + * If there is enough space left, that we can move the smp_objects + * down without overwriting the present copy, we will do so to + * compact the segment. + */ + left = smp_spaceleft(sc, sg); + len = sizeof(struct smp_object) * sg->p.lobjlist; + if (len < left) { + dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dp = sc->base + dst; + assert((uintptr_t)dp + len < (uintptr_t)sg->objs); + memcpy(dp, sg->objs, len); + sc->next_top = dst; + sg->objs = dp; + sg->p.length = (sc->next_top - sg->p.offset) + + len + IRNUP(sc, SMP_SIGN_SPACE); + (void)smp_spaceleft(sc, sg); /* for the asserts */ + + } + + /* Update the segment header */ + sg->p.objlist = sc->next_top; + + /* Write the (empty) OBJIDX signature */ + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sc->next_top >= sc->next_bot); + smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Write the (empty) SEGTAIL signature */ + smp_def_sign(sc, sg->ctx, + sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Save segment list */ + smp_save_segs(sc); + sc->free_offset = smp_segend(sg); +} + +/*-------------------------------------------------------------------- + * Silo worker thread + */ + +static void * +smp_thread(struct sess *sp, void *priv) +{ + struct smp_sc *sc; + struct smp_seg *sg; + + (void)sp; + CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); + + /* First, load all the objects from all segments */ + VTAILQ_FOREACH(sg, &sc->segments, list) + if (sg->flags & SMP_SEG_MUSTLOAD) + smp_load_seg(sp, sc, sg); + + sc->flags |= SMP_SC_LOADED; + BAN_Deref(&sc->tailban); + sc->tailban = NULL; + printf("Silo completely loaded\n"); + while (1) + (void)sleep (1); + NEEDLESS_RETURN(NULL); +} + +/*-------------------------------------------------------------------- + * Open a silo in the worker process + */ + +static void +smp_open(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + + Lck_New(&sc->mtx, lck_smp); + Lck_Lock(&sc->mtx); + + sc->stevedore = st; + + /* We trust the parent to give us a valid silo, for good measure: */ + AZ(smp_valid_silo(sc)); + + AZ(mprotect(sc->base, 4096, PROT_READ)); + + sc->ident = SIGN_DATA(&sc->idn); + + /* We attempt ban1 first, and if that fails, try ban2 */ + if (smp_open_bans(sc, &sc->ban1)) + AZ(smp_open_bans(sc, &sc->ban2)); + + /* We attempt seg1 first, and if that fails, try seg2 */ + if (smp_open_segs(sc, &sc->seg1)) + AZ(smp_open_segs(sc, &sc->seg2)); + + sc->tailban = BAN_TailRef(); + AN(sc->tailban); + + /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ + + /* XXX: abandon early segments to make sure we have free space ? */ + + /* Open a new segment, so we are ready to write */ + smp_new_seg(sc); + + /* Start the worker silo worker thread, it will load the objects */ + WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); + + VTAILQ_INSERT_TAIL(&silos, sc, list); + Lck_Unlock(&sc->mtx); +} + +/*-------------------------------------------------------------------- + * Close a silo + */ + +static void +smp_close(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + Lck_Lock(&sc->mtx); + smp_close_seg(sc, sc->cur_seg); + Lck_Unlock(&sc->mtx); + + /* XXX: reap thread */ +} + +/*-------------------------------------------------------------------- + * Allocate a bite. + * + * Allocate [min_size...max_size] space from the bottom of the segment, + * as is convenient. + * + * If 'so' + 'idx' is given, also allocate a smp_object from the top + * of the segment. + * + * Return the segment in 'ssg' if given. + */ + +static struct storage * +smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, + struct smp_object **so, unsigned *idx, struct smp_seg **ssg) +{ + struct smp_sc *sc; + struct storage *ss; + struct smp_seg *sg; + unsigned tries; + uint64_t left, extra; + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + assert(min_size <= max_size); + + max_size = IRNUP(sc, max_size); + min_size = IRNUP(sc, min_size); + + extra = IRNUP(sc, sizeof(*ss)); + if (so != NULL) { + extra += sizeof(**so); + AN(idx); + } + + Lck_Lock(&sc->mtx); + sg = NULL; + ss = NULL; + for (tries = 0; tries < 3; tries++) { + left = smp_spaceleft(sc, sc->cur_seg); + if (left >= extra + min_size) + break; + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } + if (left >= extra + min_size) { + if (left < extra + max_size) + max_size = IRNDN(sc, left - extra); + + sg = sc->cur_seg; + ss = (void*)(sc->base + sc->next_bot); + sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + sg->nalloc++; + if (so != NULL) { + sc->next_top -= sizeof(**so); + *so = (void*)(sc->base + sc->next_top); + /* Render this smp_object mostly harmless */ + (*so)->ttl = 0.; + (*so)->ban = 0.; + (*so)->ptr = 0;; + sg->objs = *so; + *idx = ++sg->p.lobjlist; + } + (void)smp_spaceleft(sc, sg); /* for the assert */ + } + Lck_Unlock(&sc->mtx); + + if (ss == NULL) + return (ss); + AN(sg); + assert(max_size >= min_size); + + /* Fill the storage structure */ + memset(ss, 0, sizeof *ss); + ss->magic = STORAGE_MAGIC; + ss->ptr = PRNUP(sc, ss + 1); + ss->space = max_size; + ss->priv = sc; + ss->stevedore = st; + ss->fd = sc->fd; + if (ssg != NULL) + *ssg = sg; + return (ss); +} + +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_getlru(const struct object *o) +{ + struct smp_seg *sg; + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); + return (sg->lru); +} + +/*-------------------------------------------------------------------- + * Allocate an object + */ + +static struct object * +smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, + const struct stv_objsecrets *soc) +{ + struct object *o; + struct storage *st; + struct smp_sc *sc; + struct smp_seg *sg; + struct smp_object *so; + struct objcore *oc; + unsigned objidx; + + CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); + AN(sp->objcore); + AN(sp->wrk->ttl >= 0); + + ltot = IRNUP(sc, ltot); + + st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); + if (st == NULL) + return (NULL); + + assert(st->space >= ltot); + ltot = st->len = st->space; + + o = STV_MkObject(sp, st->ptr, ltot, soc); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + o->objstore = st; + + oc = o->objcore; + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + oc->flags |= OC_F_LRUDONTMOVE; + + Lck_Lock(&sc->mtx); + sg->nfixed++; + sg->nobj++; + + /* We have to do this somewhere, might as well be here... */ + assert(sizeof so->hash == DIGEST_LEN); + memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); + so->ttl = o->ttl; /* XXX: grace? */ + so->ptr = (uint8_t*)o - sc->base; + so->ban = o->ban_t; + + oc->priv = sg; + oc->priv2 = objidx; + oc->methods = &smp_oc_methods; + + Lck_Unlock(&sc->mtx); + return (o); +} + +/*-------------------------------------------------------------------- + * Allocate a bite + */ + +static struct storage * +smp_alloc(struct stevedore *st, size_t size) +{ + + return (smp_allocx(st, + size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); +} + +/*-------------------------------------------------------------------- + * Trim a bite + * XXX: We could trim the last allocation. + */ + +static void +smp_trim(struct storage *ss, size_t size) +{ + + (void)ss; + (void)size; +} + +/*-------------------------------------------------------------------- + * We don't track frees of storage, we track the objects which own the + * storage and when there are no more objects in in the first segment, + * it can be reclaimed. + * XXX: We could free the last allocation, but does that happen ? + */ + +static void __match_proto__(storage_free_f) +smp_free(struct storage *st) +{ + + /* XXX */ + (void)st; +} + +/*-------------------------------------------------------------------- + * Pause until all silos have loaded. + */ + +void +SMP_Ready(void) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_SC_LOADED)) + break; + if (sc != NULL) + (void)sleep(1); + } while (sc != NULL); +} + +/*--------------------------------------------------------------------*/ + +const struct stevedore smp_stevedore = { + .magic = STEVEDORE_MAGIC, + .name = "persistent", + .init = smp_init, + .open = smp_open, + .close = smp_close, + .alloc = smp_alloc, + .allocobj = smp_allocobj, + .getlru = smp_getlru, + .free = smp_free, + .trim = smp_trim, +}; + +/*-------------------------------------------------------------------- + * Persistence is a bear to test unadultered, so we cheat by adding + * a cli command we can use to make it do tricks for us. + */ + +static void +debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) +{ + struct smp_seg *sg; + struct objcore *oc; + + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", + (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); + if (sg == sc->cur_seg) + cli_out(cli, + " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", + (uintmax_t)(sc->next_bot), + (uintmax_t)(sc->next_top), + (uintmax_t)(sc->next_top - sc->next_bot)); + cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", + sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); + if (objs) { + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " %s %p\n", + oc == &sg->lru->senteniel ? + "senteniel" : "OC: ", oc); + } + } +} + +static void +debug_persistent(struct cli *cli, const char * const * av, void *priv) +{ + struct smp_sc *sc; + + (void)priv; + + if (av[2] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) + debug_report_silo(cli, sc, 0); + return; + } + VTAILQ_FOREACH(sc, &silos, list) + if (!strcmp(av[2], sc->stevedore->ident)) + break; + if (sc == NULL) { + cli_out(cli, "Silo <%s> not found\n", av[2]); + cli_result(cli, CLIS_PARAM); + return; + } + if (av[3] == NULL) { + debug_report_silo(cli, sc, 0); + return; + } + Lck_Lock(&sc->mtx); + if (!strcmp(av[3], "sync")) { + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } else if (!strcmp(av[3], "dump")) { + debug_report_silo(cli, sc, 1); + } else { + cli_out(cli, "Unknown operation\n"); + cli_result(cli, CLIS_PARAM); + } + Lck_Unlock(&sc->mtx); +} + +static struct cli_proto debug_cmds[] = { + { "debug.persistent", "debug.persistent", + "Persistent debugging magic:\n" + "\tdebug.persistent [stevedore [cmd]]\n" + "With no cmd arg, a summary of the silo is returned.\n" + "Possible commands:\n" + "\tsync\tClose current segment, open a new one\n" + "\tdump\tinclude objcores in silo summary\n" + "", + 0, 2, "d", debug_persistent }, + { NULL } +}; + +void +SMP_Init(void) +{ + CLI_AddFuncs(debug_cmds); +} From phk at varnish-cache.org Tue Feb 8 10:50:31 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:31 +0100 Subject: [master] c4c9db0 Split management process functions into their own source file Message-ID: commit c4c9db060c23b86603ce6650803eb1431a61caf9 Author: Poul-Henning Kamp Date: Tue Feb 8 10:05:12 2011 +0000 Split management process functions into their own source file diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index af826a3..ab7b1d6 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -64,6 +64,7 @@ varnishd_SOURCES = \ storage_file.c \ storage_malloc.c \ storage_persistent.c \ + storage_persistent_mgt.c \ storage_synth.c \ storage_umem.c \ stevedore_utils.c \ diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 9e51120..eeee9ae 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -79,7 +79,7 @@ static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); * Define a signature by location and identifier. */ -static void +void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, uint64_t off, const char *id) { @@ -195,28 +195,14 @@ smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, smp_sync_sign(ctx); } -/*-------------------------------------------------------------------- - * Caculate payload of some stuff - */ - -static uint64_t -smp_stuff_len(const struct smp_sc *sc, unsigned stuff) -{ - uint64_t l; - - assert(stuff < SMP_END_STUFF); - l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; - l -= SMP_SIGN_SPACE; - return (l); -} -/*-------------------------------------------------------------------- +/*-------------------------------------------------------------------:e * Initialize a Silo with a valid but empty structure. * * XXX: more intelligent sizing of things. */ -static void +void smp_newsilo(struct smp_sc *sc) { struct smp_ident *si; @@ -265,7 +251,7 @@ smp_newsilo(struct smp_sc *sc) * Check if a silo is valid. */ -static int +int smp_valid_silo(struct smp_sc *sc) { struct smp_ident *si; @@ -333,141 +319,6 @@ smp_valid_silo(struct smp_sc *sc) } /*-------------------------------------------------------------------- - * Calculate cleaner metrics from silo dimensions - */ - -static void -smp_metrics(struct smp_sc *sc) -{ - - /* - * We do not want to loose too big chunks of the silos - * content when we are forced to clean a segment. - * - * For now insist that a segment covers no more than 1% of the silo. - * - * XXX: This should possibly depend on the size of the silo so - * XXX: trivially small silos do not run into trouble along - * XXX: the lines of "one object per segment". - */ - - sc->min_nseg = 10; - sc->max_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->min_nseg; - - fprintf(stderr, "min_nseg = %u, max_segl = %ju\n", - sc->min_nseg, (uintmax_t)sc->max_segl); - - /* - * The number of segments are limited by the size of the segment - * table(s) and from that follows the minimum size of a segmement. - */ - - sc->max_nseg = smp_stuff_len(sc, SMP_SEG1_STUFF) / sc->min_nseg; - sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; - - while (sc->min_segl < sizeof(struct object)) { - sc->max_nseg /= 2; - sc->min_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->max_nseg; - } - - fprintf(stderr, "max_nseg = %u, min_segl = %ju\n", - sc->max_nseg, (uintmax_t)sc->min_segl); - - /* - * Set our initial aim point at the exponential average of the - * two extremes. - * - * XXX: This is a pretty arbitrary choice, but having no idea - * XXX: object count, size distribution or ttl pattern at this - * XXX: point, we have to do something. - */ - - sc->aim_nseg = - (unsigned) exp((log(sc->min_nseg) + log(sc->max_nseg))*.5); - sc->aim_segl = smp_stuff_len(sc, SMP_SPC_STUFF) / sc->aim_nseg; - - fprintf(stderr, "aim_nseg = %u, aim_segl = %ju\n", - sc->aim_nseg, (uintmax_t)sc->aim_segl); - - /* - * How much space in the free reserve pool ? - */ - sc->free_reserve = sc->aim_segl * 10; - - fprintf(stderr, "free_reserve = %ju\n", sc->free_reserve); -} - -/*-------------------------------------------------------------------- - * Set up persistent storage silo in the master process. - */ - -static void -smp_init(struct stevedore *parent, int ac, char * const *av) -{ - struct smp_sc *sc; - int i; - - ASSERT_MGT(); - - AZ(av[ac]); -#define SIZOF(foo) fprintf(stderr, \ - "sizeof(%s) = %zu = 0x%zx\n", #foo, sizeof(foo), sizeof(foo)); - SIZOF(struct smp_ident); - SIZOF(struct smp_sign); - SIZOF(struct smp_segptr); - SIZOF(struct smp_object); -#undef SIZOF - - /* See comments in persistent.h */ - assert(sizeof(struct smp_ident) == SMP_IDENT_SIZE); - - /* Allocate softc */ - ALLOC_OBJ(sc, SMP_SC_MAGIC); - XXXAN(sc); - sc->parent = parent; - sc->fd = -1; - VTAILQ_INIT(&sc->segments); - - /* Argument processing */ - if (ac != 2) - ARGV_ERR("(-spersistent) wrong number of arguments\n"); - - i = STV_GetFile(av[0], &sc->fd, &sc->filename, "-spersistent"); - if (i == 2) - ARGV_ERR("(-spersistent) need filename (not directory)\n"); - - sc->align = sizeof(void*) * 2; - sc->granularity = getpagesize(); - sc->mediasize = STV_FileSize(sc->fd, av[1], &sc->granularity, - "-spersistent"); - - AZ(ftruncate(sc->fd, sc->mediasize)); - - sc->base = mmap(NULL, sc->mediasize, PROT_READ|PROT_WRITE, - MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, 0); - - if (sc->base == MAP_FAILED) - ARGV_ERR("(-spersistent) failed to mmap (%s)\n", - strerror(errno)); - - smp_def_sign(sc, &sc->idn, 0, "SILO"); - sc->ident = SIGN_DATA(&sc->idn); - - i = smp_valid_silo(sc); - if (i) - smp_newsilo(sc); - AZ(smp_valid_silo(sc)); - - smp_metrics(sc); - - parent->priv = sc; - - /* XXX: only for sendfile I guess... */ - mgt_child_inherit(sc->fd, "storage_persistent"); -} - - -/*-------------------------------------------------------------------- * Write the segmentlist back to the silo. * * We write the first copy, sync it synchronously, then write the @@ -1480,7 +1331,7 @@ SMP_Ready(void) const struct stevedore smp_stevedore = { .magic = STEVEDORE_MAGIC, .name = "persistent", - .init = smp_init, + .init = smp_mgt_init, .open = smp_open, .close = smp_close, .alloc = smp_alloc, diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index 5c7af37..45bf5d3 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -173,3 +173,34 @@ struct smp_sc { #define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) #define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) + +/* storage_persistent.c */ +void smp_newsilo(struct smp_sc *sc); +int smp_valid_silo(struct smp_sc *sc); + +void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id); + +/* storage_persistent_mgt.c */ +void smp_mgt_init(struct stevedore *parent, int ac, char * const *av); + +/*--------------------------------------------------------------------*/ + +/*-------------------------------------------------------------------- + * Caculate payload of some stuff + */ + +static inline uint64_t +smp_stuff_len(const struct smp_sc *sc, unsigned stuff) +{ + uint64_t l; + + assert(stuff < SMP_END_STUFF); + l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; + l -= SMP_SIGN_SPACE; + return (l); +} + + + + diff --git a/bin/varnishd/storage_persistent_mgt.c b/bin/varnishd/storage_persistent_mgt.c index 9e51120..e206f1a 100644 --- a/bin/varnishd/storage_persistent_mgt.c +++ b/bin/varnishd/storage_persistent_mgt.c @@ -38,300 +38,18 @@ #include "svnid.h" SVNID("$Id$") -#include #include #include -#include -#include #include -#include -#include #include #include "cache.h" #include "stevedore.h" -#include "hash_slinger.h" #include "vsha256.h" -#include "cli.h" -#include "cli_priv.h" #include "persistent.h" #include "storage_persistent.h" -/*--------------------------------------------------------------------*/ - -/* - * silos is unlocked, it only changes during startup when we are - * single-threaded - */ -static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); - -/*-------------------------------------------------------------------- - * SIGNATURE functions - * The signature is SHA256 over: - * 1. The smp_sign struct up to but not including the length field. - * 2. smp_sign->length bytes, starting after the smp_sign structure - * 3. The smp-sign->length field. - * The signature is stored after the byte-range from step 2. - */ - -/*-------------------------------------------------------------------- - * Define a signature by location and identifier. - */ - -static void -smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - - AZ(off & 7); /* Alignment */ - assert(strlen(id) < sizeof ctx->ss->ident); - - memset(ctx, 0, sizeof ctx); - ctx->ss = (void*)(sc->base + off); - ctx->unique = sc->unique; - ctx->id = id; -} - -/*-------------------------------------------------------------------- - * Check that a signature is good, leave state ready for append - */ -static int -smp_chk_sign(struct smp_signctx *ctx) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - int r = 0; - - if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) - r = 1; - else if (ctx->unique != ctx->ss->unique) - r = 2; - else if ((uintptr_t)ctx->ss != ctx->ss->mapped) - r = 3; - else { - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - if (memcmp(sign, SIGN_END(ctx), sizeof sign)) - r = 4; - } - if (r) { - fprintf(stderr, "CHK(%p %s %p %s) = %d\n", - ctx, ctx->id, ctx->ss, - r > 1 ? ctx->ss->ident : "", r); - } - return (r); -} - -/*-------------------------------------------------------------------- - * Append data to a signature - */ -static void -smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - - if (len != 0) { - SHA256_Update(&ctx->ctx, ptr, len); - ctx->ss->length += len; - } - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - memcpy(SIGN_END(ctx), sign, sizeof sign); -XXXAZ(smp_chk_sign(ctx)); -} - -/*-------------------------------------------------------------------- - * Reset a signature to empty, prepare for appending. - */ - -static void -smp_reset_sign(struct smp_signctx *ctx) -{ - - memset(ctx->ss, 0, sizeof *ctx->ss); - strcpy(ctx->ss->ident, ctx->id); - ctx->ss->unique = ctx->unique; - ctx->ss->mapped = (uintptr_t)ctx->ss; - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - smp_append_sign(ctx, NULL, 0); -} - -/*-------------------------------------------------------------------- - * Force a write of a signature block to the backing store. - */ - -static void -smp_sync_sign(const struct smp_signctx *ctx) -{ - int i; - - /* XXX: round to pages */ - i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); - if (i && 0) - fprintf(stderr, "SyncSign(%p %s) = %d %s\n", - ctx->ss, ctx->id, i, strerror(errno)); -} - -/*-------------------------------------------------------------------- - * Create and force a new signature to backing store - */ - -static void -smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - smp_def_sign(sc, ctx, off, id); - smp_reset_sign(ctx); - smp_sync_sign(ctx); -} - -/*-------------------------------------------------------------------- - * Caculate payload of some stuff - */ - -static uint64_t -smp_stuff_len(const struct smp_sc *sc, unsigned stuff) -{ - uint64_t l; - - assert(stuff < SMP_END_STUFF); - l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; - l -= SMP_SIGN_SPACE; - return (l); -} - -/*-------------------------------------------------------------------- - * Initialize a Silo with a valid but empty structure. - * - * XXX: more intelligent sizing of things. - */ - -static void -smp_newsilo(struct smp_sc *sc) -{ - struct smp_ident *si; - - ASSERT_MGT(); - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - /* Choose a new random number */ - sc->unique = random(); - - smp_reset_sign(&sc->idn); - si = sc->ident; - - memset(si, 0, sizeof *si); - strcpy(si->ident, SMP_IDENT_STRING); - si->byte_order = 0x12345678; - si->size = sizeof *si; - si->major_version = 2; - si->unique = sc->unique; - si->mediasize = sc->mediasize; - si->granularity = sc->granularity; - /* - * Aim for cache-line-width - */ - si->align = sizeof(void*) * 2; - sc->align = si->align; - - si->stuff[SMP_BAN1_STUFF] = sc->granularity; - si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; - si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; - si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; - si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; - si->stuff[SMP_END_STUFF] = si->mediasize; - assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); - - smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - smp_append_sign(&sc->idn, si, sizeof *si); - smp_sync_sign(&sc->idn); -} - -/*-------------------------------------------------------------------- - * Check if a silo is valid. - */ - -static int -smp_valid_silo(struct smp_sc *sc) -{ - struct smp_ident *si; - int i, j; - - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - if (smp_chk_sign(&sc->idn)) - return (1); - - si = sc->ident; - if (strcmp(si->ident, SMP_IDENT_STRING)) - return (2); - if (si->byte_order != 0x12345678) - return (3); - if (si->size != sizeof *si) - return (4); - if (si->major_version != 2) - return (5); - if (si->mediasize != sc->mediasize) - return (7); - if (si->granularity != sc->granularity) - return (8); - if (si->align < sizeof(void*)) - return (9); - if (!PWR2(si->align)) - return (10); - sc->align = si->align; - sc->unique = si->unique; - - /* XXX: Sanity check stuff[6] */ - - assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); - assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); - assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); - assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); - assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); - assert(si->stuff[SMP_END_STUFF] == sc->mediasize); - - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == - smp_stuff_len(sc, SMP_SEG2_STUFF)); - - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == - smp_stuff_len(sc, SMP_BAN2_STUFF)); - - smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - /* We must have one valid BAN table */ - i = smp_chk_sign(&sc->ban1); - j = smp_chk_sign(&sc->ban2); - if (i && j) - return (100 + i * 10 + j); - - /* We must have one valid SEG table */ - i = smp_chk_sign(&sc->seg1); - j = smp_chk_sign(&sc->seg2); - if (i && j) - return (200 + i * 10 + j); - return (0); -} - /*-------------------------------------------------------------------- * Calculate cleaner metrics from silo dimensions */ @@ -401,8 +119,8 @@ smp_metrics(struct smp_sc *sc) * Set up persistent storage silo in the master process. */ -static void -smp_init(struct stevedore *parent, int ac, char * const *av) +void +smp_mgt_init(struct stevedore *parent, int ac, char * const *av) { struct smp_sc *sc; int i; @@ -465,1116 +183,3 @@ smp_init(struct stevedore *parent, int ac, char * const *av) /* XXX: only for sendfile I guess... */ mgt_child_inherit(sc->fd, "storage_persistent"); } - - -/*-------------------------------------------------------------------- - * Write the segmentlist back to the silo. - * - * We write the first copy, sync it synchronously, then write the - * second copy and sync it synchronously. - * - * Provided the kernel doesn't lie, that means we will always have - * at least one valid copy on in the silo. - */ - -static void -smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) -{ - struct smp_segptr *ss; - struct smp_seg *sg; - uint64_t length; - - Lck_AssertHeld(&sc->mtx); - smp_reset_sign(ctx); - ss = SIGN_DATA(ctx); - length = 0; - VTAILQ_FOREACH(sg, &sc->segments, list) { - assert(sg->p.offset < sc->mediasize); - assert(sg->p.offset + sg->p.length <= sc->mediasize); - *ss = sg->p; - ss++; - length += sizeof *ss; - } - smp_append_sign(ctx, SIGN_DATA(ctx), length); - smp_sync_sign(ctx); -} - -static void -smp_save_segs(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - - /* - * Remove empty segments from the front of the list - * before we write the segments to disk. - */ - VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { - if (sg->nobj > 0) - break; - if (sg == sc->cur_seg) - continue; - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - } - smp_save_seg(sc, &sc->seg1); - smp_save_seg(sc, &sc->seg2); -} - - -/*--------------------------------------------------------------------- - */ - -static struct smp_object * -smp_find_so(const struct smp_seg *sg, const struct objcore *oc) -{ - struct smp_object *so; - unsigned smp_idx; - - smp_idx = oc->priv2; - assert(smp_idx > 0); - assert(smp_idx <= sg->p.lobjlist); - so = &sg->objs[sg->p.lobjlist - smp_idx]; - return (so); -} - -/*--------------------------------------------------------------------- - * Check if a given storage structure is valid to use - */ - -static int -smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, - const struct storage *st) -{ - struct smp_seg *sg2; - const uint8_t *pst; - uint64_t o; - - (void)sg; /* XXX: faster: Start search from here */ - pst = (const void *)st; - - if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) - return (0x01); /* Before silo payload start */ - if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) - return (0x02); /* After silo end */ - - o = pst - sc->base; - - /* Find which segment contains the storage structure */ - VTAILQ_FOREACH(sg2, &sc->segments, list) - if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) - break; - if (sg2 == NULL) - return (0x04); /* No claiming segment */ - if (!(sg2->flags & SMP_SEG_LOADED)) - return (0x08); /* Claiming segment not loaded */ - - /* It is now safe to access the storage structure */ - if (st->magic != STORAGE_MAGIC) - return (0x10); /* Not enough magic */ - - if (o + st->space >= sg2->p.objlist) - return (0x20); /* Allocation not inside segment */ - - if (st->len > st->space) - return (0x40); /* Plain bad... */ - - /* - * XXX: We could patch up st->stevedore and st->priv here - * XXX: but if things go right, we will never need them. - */ - return (0); -} - -/*--------------------------------------------------------------------- - * objcore methods for persistent objects - */ - -static struct object * -smp_oc_getobj(struct worker *wrk, struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - struct storage *st; - uint64_t l; - int bad; - - /* Some calls are direct, but they should match anyway */ - assert(oc->methods->getobj == smp_oc_getobj); - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (wrk == NULL) - AZ(oc->flags & OC_F_NEEDFIXUP); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - o = (void*)(sg->sc->base + so->ptr); - /* - * The object may not be in this segment since we allocate it - * In a separate operation than the smp_object. We could check - * that it is in a later segment, but that would be complicated. - * XXX: For now, be happy if it is inside th silo - */ - ASSERT_PTR_IN_SILO(sg->sc, o); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - - /* - * If this flag is not set, it will not be, and the lock is not - * needed to test it. - */ - if (!(oc->flags & OC_F_NEEDFIXUP)) - return (o); - - AN(wrk); - Lck_Lock(&sg->sc->mtx); - /* Check again, we might have raced. */ - if (oc->flags & OC_F_NEEDFIXUP) { - /* We trust caller to have a refcnt for us */ - o->objcore = oc; - - bad = 0; - l = 0; - VTAILQ_FOREACH(st, &o->store, list) { - bad |= smp_loaded_st(sg->sc, sg, st); - if (bad) - break; - l += st->len; - } - if (l != o->len) - bad |= 0x100; - - if(bad) { - o->ttl = 0; - o->grace = 0; - so->ttl = 0; - } - - sg->nfixed++; - wrk->stats.n_object++; - wrk->stats.n_vampireobject--; - oc->flags &= ~OC_F_NEEDFIXUP; - } - Lck_Unlock(&sg->sc->mtx); - return (o); -} - -static void -smp_oc_updatemeta(struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - double mttl; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - so = smp_find_so(sg, oc); - - if (isnan(o->grace)) - mttl = o->ttl; - else - mttl = - (o->ttl + o->grace); - - if (sg == sg->sc->cur_seg) { - /* Lock necessary, we might race close_seg */ - Lck_Lock(&sg->sc->mtx); - so->ban = o->ban_t; - so->ttl = mttl; - Lck_Unlock(&sg->sc->mtx); - } else { - so->ban = o->ban_t; - so->ttl = mttl; - } -} - -static void __match_proto__() -smp_oc_freeobj(struct objcore *oc) -{ - struct smp_seg *sg; - struct smp_object *so; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - Lck_Lock(&sg->sc->mtx); - so->ttl = 0; - so->ptr = 0; - - assert(sg->nobj > 0); - assert(sg->nfixed > 0); - sg->nobj--; - sg->nfixed--; - - Lck_Unlock(&sg->sc->mtx); -} - -static struct objcore_methods smp_oc_methods = { - .getobj = smp_oc_getobj, - .updatemeta = smp_oc_updatemeta, - .freeobj = smp_oc_freeobj, -}; - -/*-------------------------------------------------------------------- - * Add a new ban to all silos - */ - -static void -smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, - uint32_t flags, uint32_t len, const char *ban) -{ - uint8_t *ptr, *ptr2; - - (void)sc; - ptr = ptr2 = SIGN_END(ctx); - - memcpy(ptr, "BAN", 4); - ptr += 4; - - memcpy(ptr, &t0, sizeof t0); - ptr += sizeof t0; - - memcpy(ptr, &flags, sizeof flags); - ptr += sizeof flags; - - memcpy(ptr, &len, sizeof len); - ptr += sizeof len; - - memcpy(ptr, ban, len); - ptr += len; - - smp_append_sign(ctx, ptr2, ptr - ptr2); -} - -void -SMP_NewBan(double t0, const char *ban) -{ - struct smp_sc *sc; - uint32_t l = strlen(ban) + 1; - - VTAILQ_FOREACH(sc, &silos, list) { - smp_appendban(sc, &sc->ban1, t0, 0, l, ban); - smp_appendban(sc, &sc->ban2, t0, 0, l, ban); - } -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a ban list - */ - -static int -smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint8_t *ptr, *pe; - double t0; - uint32_t flags, length; - int i, retval = 0; - - ASSERT_CLI(); - (void)sc; - i = smp_chk_sign(ctx); - if (i) - return (i); - ptr = SIGN_DATA(ctx); - pe = ptr + ctx->ss->length; - - while (ptr < pe) { - if (memcmp(ptr, "BAN", 4)) { - retval = 1001; - break; - } - ptr += 4; - - memcpy(&t0, ptr, sizeof t0); - ptr += sizeof t0; - - memcpy(&flags, ptr, sizeof flags); - ptr += sizeof flags; - if (flags != 0) { - retval = 1002; - break; - } - - memcpy(&length, ptr, sizeof length); - ptr += sizeof length; - if (ptr + length > pe) { - retval = 1003; - break; - } - - if (ptr[length - 1] != '\0') { - retval = 1004; - break; - } - - BAN_Reload(t0, flags, (const char *)ptr); - - ptr += length; - } - assert(ptr <= pe); - return (retval); -} - - -/*--------------------------------------------------------------------*/ - -static uint64_t -smp_segend(const struct smp_seg *sg) -{ - - return (sg->p.offset + sg->p.length); -} - -static uint64_t -smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) -{ - - IASSERTALIGN(sc, sc->next_bot); - assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - assert(sc->next_bot >= sg->p.offset); - assert(sc->next_top < sg->p.offset + sg->p.length); - return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); -} - -/*-------------------------------------------------------------------- - * Load segments - * - * The overall objective is to register the existence of an object, based - * only on the minimally sized struct smp_object, without causing the - * main object to be faulted in. - * - * XXX: We can test this by mprotecting the main body of the segment - * XXX: until the first fixup happens, or even just over this loop, - * XXX: However: the requires that the smp_objects starter further - * XXX: into the segment than a page so that they do not get hit - * XXX: by the protection. - */ - -static void -smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) -{ - struct smp_object *so; - struct objcore *oc; - uint32_t no; - double t_now = TIM_real(); - struct smp_signctx ctx[1]; - - ASSERT_SILO_THREAD(sc); - CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - assert(sg->flags & SMP_SEG_MUSTLOAD); - sg->flags &= ~SMP_SEG_MUSTLOAD; - AN(sg->p.offset); - if (sg->p.objlist == 0) - return; - smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); - if (smp_chk_sign(ctx)) - return; - - /* test SEGTAIL */ - /* test OBJIDX */ - so = (void*)(sc->base + sg->p.objlist); - sg->objs = so; - no = sg->p.lobjlist; - /* Clear the bogus "hold" count */ - sg->nobj = 0; - for (;no > 0; so++,no--) { - if (so->ttl > 0 && so->ttl < t_now) - continue; - if (so->ttl < 0 && -so->ttl < t_now) - continue; - HSH_Prealloc(sp); - oc = sp->wrk->nobjcore; - oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; - oc->flags &= ~OC_F_BUSY; - oc->priv = sg; - oc->priv2 = no; - oc->methods = &smp_oc_methods; - oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); - memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); - (void)HSH_Insert(sp); - AZ(sp->wrk->nobjcore); - EXP_Inject(oc, sg->lru, fabs(so->ttl)); - sg->nobj++; - } - WRK_SumStat(sp->wrk); - sg->flags |= SMP_SEG_LOADED; -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a segment list - */ - -static int -smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint64_t length, l; - struct smp_segptr *ss, *se; - struct smp_seg *sg, *sg1, *sg2; - int i, n = 0; - - ASSERT_CLI(); - i = smp_chk_sign(ctx); - if (i) - return (i); - - ss = SIGN_DATA(ctx); - length = ctx->ss->length; - - if (length == 0) { - /* No segments */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - return (0); - } - se = ss + length / sizeof *ss; - se--; - assert(ss <= se); - - /* - * Locate the free reserve, there are only two basic cases, - * but once we start dropping segments, things gets more complicated. - */ - - sc->free_offset = se->offset + se->length; - l = sc->mediasize - sc->free_offset; - if (se->offset > ss->offset && l >= sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * Plenty of space at tail, do nothing. - */ - } else if (ss->offset > se->offset) { - /* - * [zzzz____xxxxyyyy_] - * (make) space between ends - * We might nuke the entire tail end without getting - * enough space, in which case we fall through to the - * last check. - */ - while (ss < se && ss->offset > se->offset) { - l = ss->offset - (se->offset + se->length); - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - if (l < sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * (make) space at front - */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - while (ss < se) { - l = ss->offset - sc->free_offset; - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - assert (l >= sc->free_reserve); - - - sg1 = NULL; - sg2 = NULL; - for(; ss <= se; ss++) { - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - sg->p = *ss; - - sg->flags |= SMP_SEG_MUSTLOAD; - - /* - * HACK: prevent save_segs from nuking segment until we have - * HACK: loaded it. - */ - sg->nobj = 1; - if (sg1 != NULL) { - assert(sg1->p.offset != sg->p.offset); - if (sg1->p.offset < sg->p.offset) - assert(smp_segend(sg1) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg1->p.offset); - } - if (sg2 != NULL) { - assert(sg2->p.offset != sg->p.offset); - if (sg2->p.offset < sg->p.offset) - assert(smp_segend(sg2) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg2->p.offset); - } - - /* XXX: check that they are inside silo */ - /* XXX: check that they don't overlap */ - /* XXX: check that they are serial */ - sg->sc = sc; - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - sg2 = sg; - if (sg1 == NULL) - sg1 = sg; - } - printf("Dropped %d segments to make free_reserve\n", n); - return (0); -} - -/*-------------------------------------------------------------------- - * Create a new segment - */ - -static void -smp_new_seg(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->sc = sc; - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - - /* XXX: find where it goes in silo */ - - sg->p.offset = sc->free_offset; - // XXX: align */ - assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); - assert(sg->p.offset < sc->mediasize); - - sg->p.length = sc->aim_segl; - sg->p.length &= ~7; - - if (smp_segend(sg) > sc->mediasize) { - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - sg->p.offset = sc->free_offset; - sg2 = VTAILQ_FIRST(&sc->segments); - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - } - - - assert(smp_segend(sg) <= sc->mediasize); - - sg2 = VTAILQ_FIRST(&sc->segments); - if (sg2 != NULL && sg2->p.offset > sc->free_offset) { - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - assert(smp_segend(sg) <= sg2->p.offset); - } - - sg->p.offset = IRNUP(sc, sg->p.offset); - sg->p.length = IRNDN(sc, sg->p.length); - sc->free_offset = sg->p.offset + sg->p.length; - - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - - /* Neuter the new segment in case there is an old one there */ - AN(sg->p.offset); - smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Set up our allocation points */ - sc->cur_seg = sg; - sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); - sc->next_top = smp_segend(sg); - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - IASSERTALIGN(sc, sc->next_bot); - IASSERTALIGN(sc, sc->next_top); - sg->objs = (void*)(sc->base + sc->next_top); -} - -/*-------------------------------------------------------------------- - * Close a segment - */ - -static void -smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) -{ - uint64_t left, dst, len; - void *dp; - - Lck_AssertHeld(&sc->mtx); - - assert(sg == sc->cur_seg); - AN(sg->p.offset); - sc->cur_seg = NULL; - - if (sg->nalloc == 0) { - /* XXX: if segment is empty, delete instead */ - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - return; - } - - /* - * If there is enough space left, that we can move the smp_objects - * down without overwriting the present copy, we will do so to - * compact the segment. - */ - left = smp_spaceleft(sc, sg); - len = sizeof(struct smp_object) * sg->p.lobjlist; - if (len < left) { - dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); - dp = sc->base + dst; - assert((uintptr_t)dp + len < (uintptr_t)sg->objs); - memcpy(dp, sg->objs, len); - sc->next_top = dst; - sg->objs = dp; - sg->p.length = (sc->next_top - sg->p.offset) - + len + IRNUP(sc, SMP_SIGN_SPACE); - (void)smp_spaceleft(sc, sg); /* for the asserts */ - - } - - /* Update the segment header */ - sg->p.objlist = sc->next_top; - - /* Write the (empty) OBJIDX signature */ - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - assert(sc->next_top >= sc->next_bot); - smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Write the (empty) SEGTAIL signature */ - smp_def_sign(sc, sg->ctx, - sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Save segment list */ - smp_save_segs(sc); - sc->free_offset = smp_segend(sg); -} - -/*-------------------------------------------------------------------- - * Silo worker thread - */ - -static void * -smp_thread(struct sess *sp, void *priv) -{ - struct smp_sc *sc; - struct smp_seg *sg; - - (void)sp; - CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); - - /* First, load all the objects from all segments */ - VTAILQ_FOREACH(sg, &sc->segments, list) - if (sg->flags & SMP_SEG_MUSTLOAD) - smp_load_seg(sp, sc, sg); - - sc->flags |= SMP_SC_LOADED; - BAN_Deref(&sc->tailban); - sc->tailban = NULL; - printf("Silo completely loaded\n"); - while (1) - (void)sleep (1); - NEEDLESS_RETURN(NULL); -} - -/*-------------------------------------------------------------------- - * Open a silo in the worker process - */ - -static void -smp_open(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - - Lck_New(&sc->mtx, lck_smp); - Lck_Lock(&sc->mtx); - - sc->stevedore = st; - - /* We trust the parent to give us a valid silo, for good measure: */ - AZ(smp_valid_silo(sc)); - - AZ(mprotect(sc->base, 4096, PROT_READ)); - - sc->ident = SIGN_DATA(&sc->idn); - - /* We attempt ban1 first, and if that fails, try ban2 */ - if (smp_open_bans(sc, &sc->ban1)) - AZ(smp_open_bans(sc, &sc->ban2)); - - /* We attempt seg1 first, and if that fails, try seg2 */ - if (smp_open_segs(sc, &sc->seg1)) - AZ(smp_open_segs(sc, &sc->seg2)); - - sc->tailban = BAN_TailRef(); - AN(sc->tailban); - - /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ - - /* XXX: abandon early segments to make sure we have free space ? */ - - /* Open a new segment, so we are ready to write */ - smp_new_seg(sc); - - /* Start the worker silo worker thread, it will load the objects */ - WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); - - VTAILQ_INSERT_TAIL(&silos, sc, list); - Lck_Unlock(&sc->mtx); -} - -/*-------------------------------------------------------------------- - * Close a silo - */ - -static void -smp_close(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - Lck_Lock(&sc->mtx); - smp_close_seg(sc, sc->cur_seg); - Lck_Unlock(&sc->mtx); - - /* XXX: reap thread */ -} - -/*-------------------------------------------------------------------- - * Allocate a bite. - * - * Allocate [min_size...max_size] space from the bottom of the segment, - * as is convenient. - * - * If 'so' + 'idx' is given, also allocate a smp_object from the top - * of the segment. - * - * Return the segment in 'ssg' if given. - */ - -static struct storage * -smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, - struct smp_object **so, unsigned *idx, struct smp_seg **ssg) -{ - struct smp_sc *sc; - struct storage *ss; - struct smp_seg *sg; - unsigned tries; - uint64_t left, extra; - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - assert(min_size <= max_size); - - max_size = IRNUP(sc, max_size); - min_size = IRNUP(sc, min_size); - - extra = IRNUP(sc, sizeof(*ss)); - if (so != NULL) { - extra += sizeof(**so); - AN(idx); - } - - Lck_Lock(&sc->mtx); - sg = NULL; - ss = NULL; - for (tries = 0; tries < 3; tries++) { - left = smp_spaceleft(sc, sc->cur_seg); - if (left >= extra + min_size) - break; - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } - if (left >= extra + min_size) { - if (left < extra + max_size) - max_size = IRNDN(sc, left - extra); - - sg = sc->cur_seg; - ss = (void*)(sc->base + sc->next_bot); - sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); - sg->nalloc++; - if (so != NULL) { - sc->next_top -= sizeof(**so); - *so = (void*)(sc->base + sc->next_top); - /* Render this smp_object mostly harmless */ - (*so)->ttl = 0.; - (*so)->ban = 0.; - (*so)->ptr = 0;; - sg->objs = *so; - *idx = ++sg->p.lobjlist; - } - (void)smp_spaceleft(sc, sg); /* for the assert */ - } - Lck_Unlock(&sc->mtx); - - if (ss == NULL) - return (ss); - AN(sg); - assert(max_size >= min_size); - - /* Fill the storage structure */ - memset(ss, 0, sizeof *ss); - ss->magic = STORAGE_MAGIC; - ss->ptr = PRNUP(sc, ss + 1); - ss->space = max_size; - ss->priv = sc; - ss->stevedore = st; - ss->fd = sc->fd; - if (ssg != NULL) - *ssg = sg; - return (ss); -} - -/*-------------------------------------------------------------------- - * Find the per-segment lru list for this object - */ - -static struct lru * -smp_getlru(const struct object *o) -{ - struct smp_seg *sg; - - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); - return (sg->lru); -} - -/*-------------------------------------------------------------------- - * Allocate an object - */ - -static struct object * -smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, - const struct stv_objsecrets *soc) -{ - struct object *o; - struct storage *st; - struct smp_sc *sc; - struct smp_seg *sg; - struct smp_object *so; - struct objcore *oc; - unsigned objidx; - - CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); - AN(sp->objcore); - AN(sp->wrk->ttl >= 0); - - ltot = IRNUP(sc, ltot); - - st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); - if (st == NULL) - return (NULL); - - assert(st->space >= ltot); - ltot = st->len = st->space; - - o = STV_MkObject(sp, st->ptr, ltot, soc); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - o->objstore = st; - - oc = o->objcore; - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - oc->flags |= OC_F_LRUDONTMOVE; - - Lck_Lock(&sc->mtx); - sg->nfixed++; - sg->nobj++; - - /* We have to do this somewhere, might as well be here... */ - assert(sizeof so->hash == DIGEST_LEN); - memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); - so->ttl = o->ttl; /* XXX: grace? */ - so->ptr = (uint8_t*)o - sc->base; - so->ban = o->ban_t; - - oc->priv = sg; - oc->priv2 = objidx; - oc->methods = &smp_oc_methods; - - Lck_Unlock(&sc->mtx); - return (o); -} - -/*-------------------------------------------------------------------- - * Allocate a bite - */ - -static struct storage * -smp_alloc(struct stevedore *st, size_t size) -{ - - return (smp_allocx(st, - size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); -} - -/*-------------------------------------------------------------------- - * Trim a bite - * XXX: We could trim the last allocation. - */ - -static void -smp_trim(struct storage *ss, size_t size) -{ - - (void)ss; - (void)size; -} - -/*-------------------------------------------------------------------- - * We don't track frees of storage, we track the objects which own the - * storage and when there are no more objects in in the first segment, - * it can be reclaimed. - * XXX: We could free the last allocation, but does that happen ? - */ - -static void __match_proto__(storage_free_f) -smp_free(struct storage *st) -{ - - /* XXX */ - (void)st; -} - -/*-------------------------------------------------------------------- - * Pause until all silos have loaded. - */ - -void -SMP_Ready(void) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - do { - VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_SC_LOADED)) - break; - if (sc != NULL) - (void)sleep(1); - } while (sc != NULL); -} - -/*--------------------------------------------------------------------*/ - -const struct stevedore smp_stevedore = { - .magic = STEVEDORE_MAGIC, - .name = "persistent", - .init = smp_init, - .open = smp_open, - .close = smp_close, - .alloc = smp_alloc, - .allocobj = smp_allocobj, - .getlru = smp_getlru, - .free = smp_free, - .trim = smp_trim, -}; - -/*-------------------------------------------------------------------- - * Persistence is a bear to test unadultered, so we cheat by adding - * a cli command we can use to make it do tricks for us. - */ - -static void -debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) -{ - struct smp_seg *sg; - struct objcore *oc; - - cli_out(cli, "Silo: %s (%s)\n", - sc->stevedore->ident, sc->filename); - VTAILQ_FOREACH(sg, &sc->segments, list) { - cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", - (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); - if (sg == sc->cur_seg) - cli_out(cli, - " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", - (uintmax_t)(sc->next_bot), - (uintmax_t)(sc->next_top), - (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", - sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); - if (objs) { - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) - cli_out(cli, " %s %p\n", - oc == &sg->lru->senteniel ? - "senteniel" : "OC: ", oc); - } - } -} - -static void -debug_persistent(struct cli *cli, const char * const * av, void *priv) -{ - struct smp_sc *sc; - - (void)priv; - - if (av[2] == NULL) { - VTAILQ_FOREACH(sc, &silos, list) - debug_report_silo(cli, sc, 0); - return; - } - VTAILQ_FOREACH(sc, &silos, list) - if (!strcmp(av[2], sc->stevedore->ident)) - break; - if (sc == NULL) { - cli_out(cli, "Silo <%s> not found\n", av[2]); - cli_result(cli, CLIS_PARAM); - return; - } - if (av[3] == NULL) { - debug_report_silo(cli, sc, 0); - return; - } - Lck_Lock(&sc->mtx); - if (!strcmp(av[3], "sync")) { - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } else if (!strcmp(av[3], "dump")) { - debug_report_silo(cli, sc, 1); - } else { - cli_out(cli, "Unknown operation\n"); - cli_result(cli, CLIS_PARAM); - } - Lck_Unlock(&sc->mtx); -} - -static struct cli_proto debug_cmds[] = { - { "debug.persistent", "debug.persistent", - "Persistent debugging magic:\n" - "\tdebug.persistent [stevedore [cmd]]\n" - "With no cmd arg, a summary of the silo is returned.\n" - "Possible commands:\n" - "\tsync\tClose current segment, open a new one\n" - "\tdump\tinclude objcores in silo summary\n" - "", - 0, 2, "d", debug_persistent }, - { NULL } -}; - -void -SMP_Init(void) -{ - CLI_AddFuncs(debug_cmds); -} From phk at varnish-cache.org Tue Feb 8 10:50:33 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:33 +0100 Subject: [master] 1b33b0f clone from storage_persistent.c Message-ID: commit 1b33b0f41bc883136c0367cb151abcc738593924 Author: Poul-Henning Kamp Date: Tue Feb 8 10:07:06 2011 +0000 clone from storage_persistent.c diff --git a/bin/varnishd/storage_persistent_subr.c b/bin/varnishd/storage_persistent_subr.c new file mode 100644 index 0000000..eeee9ae --- /dev/null +++ b/bin/varnishd/storage_persistent_subr.c @@ -0,0 +1,1431 @@ +/*- + * Copyright (c) 2008-2010 Linpro AS + * All rights reserved. + * + * Author: Poul-Henning Kamp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Persistent storage method + * + * XXX: Before we start the client or maybe after it stops, we should give the + * XXX: stevedores a chance to examine their storage for consistency. + * + * XXX: Do we ever free the LRU-lists ? + */ + +#include "config.h" + +#include "svnid.h" +SVNID("$Id$") + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cache.h" +#include "stevedore.h" +#include "hash_slinger.h" +#include "vsha256.h" +#include "cli.h" +#include "cli_priv.h" + +#include "persistent.h" +#include "storage_persistent.h" + +/*--------------------------------------------------------------------*/ + +/* + * silos is unlocked, it only changes during startup when we are + * single-threaded + */ +static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); + +/*-------------------------------------------------------------------- + * SIGNATURE functions + * The signature is SHA256 over: + * 1. The smp_sign struct up to but not including the length field. + * 2. smp_sign->length bytes, starting after the smp_sign structure + * 3. The smp-sign->length field. + * The signature is stored after the byte-range from step 2. + */ + +/*-------------------------------------------------------------------- + * Define a signature by location and identifier. + */ + +void +smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + + AZ(off & 7); /* Alignment */ + assert(strlen(id) < sizeof ctx->ss->ident); + + memset(ctx, 0, sizeof ctx); + ctx->ss = (void*)(sc->base + off); + ctx->unique = sc->unique; + ctx->id = id; +} + +/*-------------------------------------------------------------------- + * Check that a signature is good, leave state ready for append + */ +static int +smp_chk_sign(struct smp_signctx *ctx) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + int r = 0; + + if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) + r = 1; + else if (ctx->unique != ctx->ss->unique) + r = 2; + else if ((uintptr_t)ctx->ss != ctx->ss->mapped) + r = 3; + else { + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + if (memcmp(sign, SIGN_END(ctx), sizeof sign)) + r = 4; + } + if (r) { + fprintf(stderr, "CHK(%p %s %p %s) = %d\n", + ctx, ctx->id, ctx->ss, + r > 1 ? ctx->ss->ident : "", r); + } + return (r); +} + +/*-------------------------------------------------------------------- + * Append data to a signature + */ +static void +smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) +{ + struct SHA256Context cx; + unsigned char sign[SHA256_LEN]; + + if (len != 0) { + SHA256_Update(&ctx->ctx, ptr, len); + ctx->ss->length += len; + } + cx = ctx->ctx; + SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); + SHA256_Final(sign, &cx); + memcpy(SIGN_END(ctx), sign, sizeof sign); +XXXAZ(smp_chk_sign(ctx)); +} + +/*-------------------------------------------------------------------- + * Reset a signature to empty, prepare for appending. + */ + +static void +smp_reset_sign(struct smp_signctx *ctx) +{ + + memset(ctx->ss, 0, sizeof *ctx->ss); + strcpy(ctx->ss->ident, ctx->id); + ctx->ss->unique = ctx->unique; + ctx->ss->mapped = (uintptr_t)ctx->ss; + SHA256_Init(&ctx->ctx); + SHA256_Update(&ctx->ctx, ctx->ss, + offsetof(struct smp_sign, length)); + smp_append_sign(ctx, NULL, 0); +} + +/*-------------------------------------------------------------------- + * Force a write of a signature block to the backing store. + */ + +static void +smp_sync_sign(const struct smp_signctx *ctx) +{ + int i; + + /* XXX: round to pages */ + i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); + if (i && 0) + fprintf(stderr, "SyncSign(%p %s) = %d %s\n", + ctx->ss, ctx->id, i, strerror(errno)); +} + +/*-------------------------------------------------------------------- + * Create and force a new signature to backing store + */ + +static void +smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id) +{ + smp_def_sign(sc, ctx, off, id); + smp_reset_sign(ctx); + smp_sync_sign(ctx); +} + + +/*-------------------------------------------------------------------:e + * Initialize a Silo with a valid but empty structure. + * + * XXX: more intelligent sizing of things. + */ + +void +smp_newsilo(struct smp_sc *sc) +{ + struct smp_ident *si; + + ASSERT_MGT(); + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + /* Choose a new random number */ + sc->unique = random(); + + smp_reset_sign(&sc->idn); + si = sc->ident; + + memset(si, 0, sizeof *si); + strcpy(si->ident, SMP_IDENT_STRING); + si->byte_order = 0x12345678; + si->size = sizeof *si; + si->major_version = 2; + si->unique = sc->unique; + si->mediasize = sc->mediasize; + si->granularity = sc->granularity; + /* + * Aim for cache-line-width + */ + si->align = sizeof(void*) * 2; + sc->align = si->align; + + si->stuff[SMP_BAN1_STUFF] = sc->granularity; + si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; + si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; + si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; + si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; + si->stuff[SMP_END_STUFF] = si->mediasize; + assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); + + smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + smp_append_sign(&sc->idn, si, sizeof *si); + smp_sync_sign(&sc->idn); +} + +/*-------------------------------------------------------------------- + * Check if a silo is valid. + */ + +int +smp_valid_silo(struct smp_sc *sc) +{ + struct smp_ident *si; + int i, j; + + assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); + + if (smp_chk_sign(&sc->idn)) + return (1); + + si = sc->ident; + if (strcmp(si->ident, SMP_IDENT_STRING)) + return (2); + if (si->byte_order != 0x12345678) + return (3); + if (si->size != sizeof *si) + return (4); + if (si->major_version != 2) + return (5); + if (si->mediasize != sc->mediasize) + return (7); + if (si->granularity != sc->granularity) + return (8); + if (si->align < sizeof(void*)) + return (9); + if (!PWR2(si->align)) + return (10); + sc->align = si->align; + sc->unique = si->unique; + + /* XXX: Sanity check stuff[6] */ + + assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); + assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); + assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); + assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); + assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); + assert(si->stuff[SMP_END_STUFF] == sc->mediasize); + + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == + smp_stuff_len(sc, SMP_SEG2_STUFF)); + + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); + assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == + smp_stuff_len(sc, SMP_BAN2_STUFF)); + + smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); + smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); + smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); + smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); + + /* We must have one valid BAN table */ + i = smp_chk_sign(&sc->ban1); + j = smp_chk_sign(&sc->ban2); + if (i && j) + return (100 + i * 10 + j); + + /* We must have one valid SEG table */ + i = smp_chk_sign(&sc->seg1); + j = smp_chk_sign(&sc->seg2); + if (i && j) + return (200 + i * 10 + j); + return (0); +} + +/*-------------------------------------------------------------------- + * Write the segmentlist back to the silo. + * + * We write the first copy, sync it synchronously, then write the + * second copy and sync it synchronously. + * + * Provided the kernel doesn't lie, that means we will always have + * at least one valid copy on in the silo. + */ + +static void +smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) +{ + struct smp_segptr *ss; + struct smp_seg *sg; + uint64_t length; + + Lck_AssertHeld(&sc->mtx); + smp_reset_sign(ctx); + ss = SIGN_DATA(ctx); + length = 0; + VTAILQ_FOREACH(sg, &sc->segments, list) { + assert(sg->p.offset < sc->mediasize); + assert(sg->p.offset + sg->p.length <= sc->mediasize); + *ss = sg->p; + ss++; + length += sizeof *ss; + } + smp_append_sign(ctx, SIGN_DATA(ctx), length); + smp_sync_sign(ctx); +} + +static void +smp_save_segs(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + + /* + * Remove empty segments from the front of the list + * before we write the segments to disk. + */ + VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { + if (sg->nobj > 0) + break; + if (sg == sc->cur_seg) + continue; + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + } + smp_save_seg(sc, &sc->seg1); + smp_save_seg(sc, &sc->seg2); +} + + +/*--------------------------------------------------------------------- + */ + +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) +{ + struct smp_object *so; + unsigned smp_idx; + + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; + return (so); +} + +/*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use + */ + +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) +{ + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; + + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; + + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ + + o = pst - sc->base; + + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) + break; + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ + + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ + + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ + + if (st->len > st->space) + return (0x40); /* Plain bad... */ + + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); +} + +/*--------------------------------------------------------------------- + * objcore methods for persistent objects + */ + +static struct object * +smp_oc_getobj(struct worker *wrk, struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + struct storage *st; + uint64_t l; + int bad; + + /* Some calls are direct, but they should match anyway */ + assert(oc->methods->getobj == smp_oc_getobj); + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + if (wrk == NULL) + AZ(oc->flags & OC_F_NEEDFIXUP); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + o = (void*)(sg->sc->base + so->ptr); + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + + /* + * If this flag is not set, it will not be, and the lock is not + * needed to test it. + */ + if (!(oc->flags & OC_F_NEEDFIXUP)) + return (o); + + AN(wrk); + Lck_Lock(&sg->sc->mtx); + /* Check again, we might have raced. */ + if (oc->flags & OC_F_NEEDFIXUP) { + /* We trust caller to have a refcnt for us */ + o->objcore = oc; + + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; + + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } + + sg->nfixed++; + wrk->stats.n_object++; + wrk->stats.n_vampireobject--; + oc->flags &= ~OC_F_NEEDFIXUP; + } + Lck_Unlock(&sg->sc->mtx); + return (o); +} + +static void +smp_oc_updatemeta(struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + double mttl; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + o = smp_oc_getobj(NULL, oc); + AN(o); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); + so = smp_find_so(sg, oc); + + if (isnan(o->grace)) + mttl = o->ttl; + else + mttl = - (o->ttl + o->grace); + + if (sg == sg->sc->cur_seg) { + /* Lock necessary, we might race close_seg */ + Lck_Lock(&sg->sc->mtx); + so->ban = o->ban_t; + so->ttl = mttl; + Lck_Unlock(&sg->sc->mtx); + } else { + so->ban = o->ban_t; + so->ttl = mttl; + } +} + +static void __match_proto__() +smp_oc_freeobj(struct objcore *oc) +{ + struct smp_seg *sg; + struct smp_object *so; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + Lck_Lock(&sg->sc->mtx); + so->ttl = 0; + so->ptr = 0; + + assert(sg->nobj > 0); + assert(sg->nfixed > 0); + sg->nobj--; + sg->nfixed--; + + Lck_Unlock(&sg->sc->mtx); +} + +static struct objcore_methods smp_oc_methods = { + .getobj = smp_oc_getobj, + .updatemeta = smp_oc_updatemeta, + .freeobj = smp_oc_freeobj, +}; + +/*-------------------------------------------------------------------- + * Add a new ban to all silos + */ + +static void +smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, + uint32_t flags, uint32_t len, const char *ban) +{ + uint8_t *ptr, *ptr2; + + (void)sc; + ptr = ptr2 = SIGN_END(ctx); + + memcpy(ptr, "BAN", 4); + ptr += 4; + + memcpy(ptr, &t0, sizeof t0); + ptr += sizeof t0; + + memcpy(ptr, &flags, sizeof flags); + ptr += sizeof flags; + + memcpy(ptr, &len, sizeof len); + ptr += sizeof len; + + memcpy(ptr, ban, len); + ptr += len; + + smp_append_sign(ctx, ptr2, ptr - ptr2); +} + +void +SMP_NewBan(double t0, const char *ban) +{ + struct smp_sc *sc; + uint32_t l = strlen(ban) + 1; + + VTAILQ_FOREACH(sc, &silos, list) { + smp_appendban(sc, &sc->ban1, t0, 0, l, ban); + smp_appendban(sc, &sc->ban2, t0, 0, l, ban); + } +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a ban list + */ + +static int +smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint8_t *ptr, *pe; + double t0; + uint32_t flags, length; + int i, retval = 0; + + ASSERT_CLI(); + (void)sc; + i = smp_chk_sign(ctx); + if (i) + return (i); + ptr = SIGN_DATA(ctx); + pe = ptr + ctx->ss->length; + + while (ptr < pe) { + if (memcmp(ptr, "BAN", 4)) { + retval = 1001; + break; + } + ptr += 4; + + memcpy(&t0, ptr, sizeof t0); + ptr += sizeof t0; + + memcpy(&flags, ptr, sizeof flags); + ptr += sizeof flags; + if (flags != 0) { + retval = 1002; + break; + } + + memcpy(&length, ptr, sizeof length); + ptr += sizeof length; + if (ptr + length > pe) { + retval = 1003; + break; + } + + if (ptr[length - 1] != '\0') { + retval = 1004; + break; + } + + BAN_Reload(t0, flags, (const char *)ptr); + + ptr += length; + } + assert(ptr <= pe); + return (retval); +} + + +/*--------------------------------------------------------------------*/ + +static uint64_t +smp_segend(const struct smp_seg *sg) +{ + + return (sg->p.offset + sg->p.length); +} + +static uint64_t +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) +{ + + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); +} + +/*-------------------------------------------------------------------- + * Load segments + * + * The overall objective is to register the existence of an object, based + * only on the minimally sized struct smp_object, without causing the + * main object to be faulted in. + * + * XXX: We can test this by mprotecting the main body of the segment + * XXX: until the first fixup happens, or even just over this loop, + * XXX: However: the requires that the smp_objects starter further + * XXX: into the segment than a page so that they do not get hit + * XXX: by the protection. + */ + +static void +smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) +{ + struct smp_object *so; + struct objcore *oc; + uint32_t no; + double t_now = TIM_real(); + struct smp_signctx ctx[1]; + + ASSERT_SILO_THREAD(sc); + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + assert(sg->flags & SMP_SEG_MUSTLOAD); + sg->flags &= ~SMP_SEG_MUSTLOAD; + AN(sg->p.offset); + if (sg->p.objlist == 0) + return; + smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); + if (smp_chk_sign(ctx)) + return; + + /* test SEGTAIL */ + /* test OBJIDX */ + so = (void*)(sc->base + sg->p.objlist); + sg->objs = so; + no = sg->p.lobjlist; + /* Clear the bogus "hold" count */ + sg->nobj = 0; + for (;no > 0; so++,no--) { + if (so->ttl > 0 && so->ttl < t_now) + continue; + if (so->ttl < 0 && -so->ttl < t_now) + continue; + HSH_Prealloc(sp); + oc = sp->wrk->nobjcore; + oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; + oc->flags &= ~OC_F_BUSY; + oc->priv = sg; + oc->priv2 = no; + oc->methods = &smp_oc_methods; + oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); + memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); + (void)HSH_Insert(sp); + AZ(sp->wrk->nobjcore); + EXP_Inject(oc, sg->lru, fabs(so->ttl)); + sg->nobj++; + } + WRK_SumStat(sp->wrk); + sg->flags |= SMP_SEG_LOADED; +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a segment list + */ + +static int +smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint64_t length, l; + struct smp_segptr *ss, *se; + struct smp_seg *sg, *sg1, *sg2; + int i, n = 0; + + ASSERT_CLI(); + i = smp_chk_sign(ctx); + if (i) + return (i); + + ss = SIGN_DATA(ctx); + length = ctx->ss->length; + + if (length == 0) { + /* No segments */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + return (0); + } + se = ss + length / sizeof *ss; + se--; + assert(ss <= se); + + /* + * Locate the free reserve, there are only two basic cases, + * but once we start dropping segments, things gets more complicated. + */ + + sc->free_offset = se->offset + se->length; + l = sc->mediasize - sc->free_offset; + if (se->offset > ss->offset && l >= sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * Plenty of space at tail, do nothing. + */ + } else if (ss->offset > se->offset) { + /* + * [zzzz____xxxxyyyy_] + * (make) space between ends + * We might nuke the entire tail end without getting + * enough space, in which case we fall through to the + * last check. + */ + while (ss < se && ss->offset > se->offset) { + l = ss->offset - (se->offset + se->length); + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + if (l < sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * (make) space at front + */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + while (ss < se) { + l = ss->offset - sc->free_offset; + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + assert (l >= sc->free_reserve); + + + sg1 = NULL; + sg2 = NULL; + for(; ss <= se; ss++) { + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + sg->p = *ss; + + sg->flags |= SMP_SEG_MUSTLOAD; + + /* + * HACK: prevent save_segs from nuking segment until we have + * HACK: loaded it. + */ + sg->nobj = 1; + if (sg1 != NULL) { + assert(sg1->p.offset != sg->p.offset); + if (sg1->p.offset < sg->p.offset) + assert(smp_segend(sg1) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg1->p.offset); + } + if (sg2 != NULL) { + assert(sg2->p.offset != sg->p.offset); + if (sg2->p.offset < sg->p.offset) + assert(smp_segend(sg2) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg2->p.offset); + } + + /* XXX: check that they are inside silo */ + /* XXX: check that they don't overlap */ + /* XXX: check that they are serial */ + sg->sc = sc; + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + sg2 = sg; + if (sg1 == NULL) + sg1 = sg; + } + printf("Dropped %d segments to make free_reserve\n", n); + return (0); +} + +/*-------------------------------------------------------------------- + * Create a new segment + */ + +static void +smp_new_seg(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->sc = sc; + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + + /* XXX: find where it goes in silo */ + + sg->p.offset = sc->free_offset; + // XXX: align */ + assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); + assert(sg->p.offset < sc->mediasize); + + sg->p.length = sc->aim_segl; + sg->p.length &= ~7; + + if (smp_segend(sg) > sc->mediasize) { + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + sg->p.offset = sc->free_offset; + sg2 = VTAILQ_FIRST(&sc->segments); + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + } + + + assert(smp_segend(sg) <= sc->mediasize); + + sg2 = VTAILQ_FIRST(&sc->segments); + if (sg2 != NULL && sg2->p.offset > sc->free_offset) { + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + assert(smp_segend(sg) <= sg2->p.offset); + } + + sg->p.offset = IRNUP(sc, sg->p.offset); + sg->p.length = IRNDN(sc, sg->p.length); + sc->free_offset = sg->p.offset + sg->p.length; + + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + + /* Neuter the new segment in case there is an old one there */ + AN(sg->p.offset); + smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Set up our allocation points */ + sc->cur_seg = sg; + sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sc->next_top = smp_segend(sg); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sc->next_bot); + IASSERTALIGN(sc, sc->next_top); + sg->objs = (void*)(sc->base + sc->next_top); +} + +/*-------------------------------------------------------------------- + * Close a segment + */ + +static void +smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) +{ + uint64_t left, dst, len; + void *dp; + + Lck_AssertHeld(&sc->mtx); + + assert(sg == sc->cur_seg); + AN(sg->p.offset); + sc->cur_seg = NULL; + + if (sg->nalloc == 0) { + /* XXX: if segment is empty, delete instead */ + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + return; + } + + /* + * If there is enough space left, that we can move the smp_objects + * down without overwriting the present copy, we will do so to + * compact the segment. + */ + left = smp_spaceleft(sc, sg); + len = sizeof(struct smp_object) * sg->p.lobjlist; + if (len < left) { + dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dp = sc->base + dst; + assert((uintptr_t)dp + len < (uintptr_t)sg->objs); + memcpy(dp, sg->objs, len); + sc->next_top = dst; + sg->objs = dp; + sg->p.length = (sc->next_top - sg->p.offset) + + len + IRNUP(sc, SMP_SIGN_SPACE); + (void)smp_spaceleft(sc, sg); /* for the asserts */ + + } + + /* Update the segment header */ + sg->p.objlist = sc->next_top; + + /* Write the (empty) OBJIDX signature */ + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sc->next_top >= sc->next_bot); + smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Write the (empty) SEGTAIL signature */ + smp_def_sign(sc, sg->ctx, + sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Save segment list */ + smp_save_segs(sc); + sc->free_offset = smp_segend(sg); +} + +/*-------------------------------------------------------------------- + * Silo worker thread + */ + +static void * +smp_thread(struct sess *sp, void *priv) +{ + struct smp_sc *sc; + struct smp_seg *sg; + + (void)sp; + CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); + + /* First, load all the objects from all segments */ + VTAILQ_FOREACH(sg, &sc->segments, list) + if (sg->flags & SMP_SEG_MUSTLOAD) + smp_load_seg(sp, sc, sg); + + sc->flags |= SMP_SC_LOADED; + BAN_Deref(&sc->tailban); + sc->tailban = NULL; + printf("Silo completely loaded\n"); + while (1) + (void)sleep (1); + NEEDLESS_RETURN(NULL); +} + +/*-------------------------------------------------------------------- + * Open a silo in the worker process + */ + +static void +smp_open(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + + Lck_New(&sc->mtx, lck_smp); + Lck_Lock(&sc->mtx); + + sc->stevedore = st; + + /* We trust the parent to give us a valid silo, for good measure: */ + AZ(smp_valid_silo(sc)); + + AZ(mprotect(sc->base, 4096, PROT_READ)); + + sc->ident = SIGN_DATA(&sc->idn); + + /* We attempt ban1 first, and if that fails, try ban2 */ + if (smp_open_bans(sc, &sc->ban1)) + AZ(smp_open_bans(sc, &sc->ban2)); + + /* We attempt seg1 first, and if that fails, try seg2 */ + if (smp_open_segs(sc, &sc->seg1)) + AZ(smp_open_segs(sc, &sc->seg2)); + + sc->tailban = BAN_TailRef(); + AN(sc->tailban); + + /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ + + /* XXX: abandon early segments to make sure we have free space ? */ + + /* Open a new segment, so we are ready to write */ + smp_new_seg(sc); + + /* Start the worker silo worker thread, it will load the objects */ + WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); + + VTAILQ_INSERT_TAIL(&silos, sc, list); + Lck_Unlock(&sc->mtx); +} + +/*-------------------------------------------------------------------- + * Close a silo + */ + +static void +smp_close(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + Lck_Lock(&sc->mtx); + smp_close_seg(sc, sc->cur_seg); + Lck_Unlock(&sc->mtx); + + /* XXX: reap thread */ +} + +/*-------------------------------------------------------------------- + * Allocate a bite. + * + * Allocate [min_size...max_size] space from the bottom of the segment, + * as is convenient. + * + * If 'so' + 'idx' is given, also allocate a smp_object from the top + * of the segment. + * + * Return the segment in 'ssg' if given. + */ + +static struct storage * +smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, + struct smp_object **so, unsigned *idx, struct smp_seg **ssg) +{ + struct smp_sc *sc; + struct storage *ss; + struct smp_seg *sg; + unsigned tries; + uint64_t left, extra; + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + assert(min_size <= max_size); + + max_size = IRNUP(sc, max_size); + min_size = IRNUP(sc, min_size); + + extra = IRNUP(sc, sizeof(*ss)); + if (so != NULL) { + extra += sizeof(**so); + AN(idx); + } + + Lck_Lock(&sc->mtx); + sg = NULL; + ss = NULL; + for (tries = 0; tries < 3; tries++) { + left = smp_spaceleft(sc, sc->cur_seg); + if (left >= extra + min_size) + break; + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } + if (left >= extra + min_size) { + if (left < extra + max_size) + max_size = IRNDN(sc, left - extra); + + sg = sc->cur_seg; + ss = (void*)(sc->base + sc->next_bot); + sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + sg->nalloc++; + if (so != NULL) { + sc->next_top -= sizeof(**so); + *so = (void*)(sc->base + sc->next_top); + /* Render this smp_object mostly harmless */ + (*so)->ttl = 0.; + (*so)->ban = 0.; + (*so)->ptr = 0;; + sg->objs = *so; + *idx = ++sg->p.lobjlist; + } + (void)smp_spaceleft(sc, sg); /* for the assert */ + } + Lck_Unlock(&sc->mtx); + + if (ss == NULL) + return (ss); + AN(sg); + assert(max_size >= min_size); + + /* Fill the storage structure */ + memset(ss, 0, sizeof *ss); + ss->magic = STORAGE_MAGIC; + ss->ptr = PRNUP(sc, ss + 1); + ss->space = max_size; + ss->priv = sc; + ss->stevedore = st; + ss->fd = sc->fd; + if (ssg != NULL) + *ssg = sg; + return (ss); +} + +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_getlru(const struct object *o) +{ + struct smp_seg *sg; + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); + return (sg->lru); +} + +/*-------------------------------------------------------------------- + * Allocate an object + */ + +static struct object * +smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, + const struct stv_objsecrets *soc) +{ + struct object *o; + struct storage *st; + struct smp_sc *sc; + struct smp_seg *sg; + struct smp_object *so; + struct objcore *oc; + unsigned objidx; + + CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); + AN(sp->objcore); + AN(sp->wrk->ttl >= 0); + + ltot = IRNUP(sc, ltot); + + st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); + if (st == NULL) + return (NULL); + + assert(st->space >= ltot); + ltot = st->len = st->space; + + o = STV_MkObject(sp, st->ptr, ltot, soc); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + o->objstore = st; + + oc = o->objcore; + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + oc->flags |= OC_F_LRUDONTMOVE; + + Lck_Lock(&sc->mtx); + sg->nfixed++; + sg->nobj++; + + /* We have to do this somewhere, might as well be here... */ + assert(sizeof so->hash == DIGEST_LEN); + memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); + so->ttl = o->ttl; /* XXX: grace? */ + so->ptr = (uint8_t*)o - sc->base; + so->ban = o->ban_t; + + oc->priv = sg; + oc->priv2 = objidx; + oc->methods = &smp_oc_methods; + + Lck_Unlock(&sc->mtx); + return (o); +} + +/*-------------------------------------------------------------------- + * Allocate a bite + */ + +static struct storage * +smp_alloc(struct stevedore *st, size_t size) +{ + + return (smp_allocx(st, + size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); +} + +/*-------------------------------------------------------------------- + * Trim a bite + * XXX: We could trim the last allocation. + */ + +static void +smp_trim(struct storage *ss, size_t size) +{ + + (void)ss; + (void)size; +} + +/*-------------------------------------------------------------------- + * We don't track frees of storage, we track the objects which own the + * storage and when there are no more objects in in the first segment, + * it can be reclaimed. + * XXX: We could free the last allocation, but does that happen ? + */ + +static void __match_proto__(storage_free_f) +smp_free(struct storage *st) +{ + + /* XXX */ + (void)st; +} + +/*-------------------------------------------------------------------- + * Pause until all silos have loaded. + */ + +void +SMP_Ready(void) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_SC_LOADED)) + break; + if (sc != NULL) + (void)sleep(1); + } while (sc != NULL); +} + +/*--------------------------------------------------------------------*/ + +const struct stevedore smp_stevedore = { + .magic = STEVEDORE_MAGIC, + .name = "persistent", + .init = smp_mgt_init, + .open = smp_open, + .close = smp_close, + .alloc = smp_alloc, + .allocobj = smp_allocobj, + .getlru = smp_getlru, + .free = smp_free, + .trim = smp_trim, +}; + +/*-------------------------------------------------------------------- + * Persistence is a bear to test unadultered, so we cheat by adding + * a cli command we can use to make it do tricks for us. + */ + +static void +debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) +{ + struct smp_seg *sg; + struct objcore *oc; + + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", + (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); + if (sg == sc->cur_seg) + cli_out(cli, + " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", + (uintmax_t)(sc->next_bot), + (uintmax_t)(sc->next_top), + (uintmax_t)(sc->next_top - sc->next_bot)); + cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", + sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); + if (objs) { + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " %s %p\n", + oc == &sg->lru->senteniel ? + "senteniel" : "OC: ", oc); + } + } +} + +static void +debug_persistent(struct cli *cli, const char * const * av, void *priv) +{ + struct smp_sc *sc; + + (void)priv; + + if (av[2] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) + debug_report_silo(cli, sc, 0); + return; + } + VTAILQ_FOREACH(sc, &silos, list) + if (!strcmp(av[2], sc->stevedore->ident)) + break; + if (sc == NULL) { + cli_out(cli, "Silo <%s> not found\n", av[2]); + cli_result(cli, CLIS_PARAM); + return; + } + if (av[3] == NULL) { + debug_report_silo(cli, sc, 0); + return; + } + Lck_Lock(&sc->mtx); + if (!strcmp(av[3], "sync")) { + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } else if (!strcmp(av[3], "dump")) { + debug_report_silo(cli, sc, 1); + } else { + cli_out(cli, "Unknown operation\n"); + cli_result(cli, CLIS_PARAM); + } + Lck_Unlock(&sc->mtx); +} + +static struct cli_proto debug_cmds[] = { + { "debug.persistent", "debug.persistent", + "Persistent debugging magic:\n" + "\tdebug.persistent [stevedore [cmd]]\n" + "With no cmd arg, a summary of the silo is returned.\n" + "Possible commands:\n" + "\tsync\tClose current segment, open a new one\n" + "\tdump\tinclude objcores in silo summary\n" + "", + 0, 2, "d", debug_persistent }, + { NULL } +}; + +void +SMP_Init(void) +{ + CLI_AddFuncs(debug_cmds); +} From phk at varnish-cache.org Tue Feb 8 10:50:36 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:36 +0100 Subject: [master] 55f1241 Move various subroutines to storage_persistent_subr.c, these are the ones that later may find usage in a separate silo-maintenance utility. Message-ID: commit 55f1241e4c2ba2b702d21347b855049bfe9cc77f Author: Poul-Henning Kamp Date: Tue Feb 8 10:21:42 2011 +0000 Move various subroutines to storage_persistent_subr.c, these are the ones that later may find usage in a separate silo-maintenance utility. diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index ab7b1d6..208be08 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -65,6 +65,7 @@ varnishd_SOURCES = \ storage_malloc.c \ storage_persistent.c \ storage_persistent_mgt.c \ + storage_persistent_subr.c \ storage_synth.c \ storage_umem.c \ stevedore_utils.c \ diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index eeee9ae..4a971cb 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -41,7 +41,6 @@ SVNID("$Id$") #include #include #include -#include #include #include #include @@ -67,258 +66,6 @@ SVNID("$Id$") static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); /*-------------------------------------------------------------------- - * SIGNATURE functions - * The signature is SHA256 over: - * 1. The smp_sign struct up to but not including the length field. - * 2. smp_sign->length bytes, starting after the smp_sign structure - * 3. The smp-sign->length field. - * The signature is stored after the byte-range from step 2. - */ - -/*-------------------------------------------------------------------- - * Define a signature by location and identifier. - */ - -void -smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - - AZ(off & 7); /* Alignment */ - assert(strlen(id) < sizeof ctx->ss->ident); - - memset(ctx, 0, sizeof ctx); - ctx->ss = (void*)(sc->base + off); - ctx->unique = sc->unique; - ctx->id = id; -} - -/*-------------------------------------------------------------------- - * Check that a signature is good, leave state ready for append - */ -static int -smp_chk_sign(struct smp_signctx *ctx) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - int r = 0; - - if (strncmp(ctx->id, ctx->ss->ident, sizeof ctx->ss->ident)) - r = 1; - else if (ctx->unique != ctx->ss->unique) - r = 2; - else if ((uintptr_t)ctx->ss != ctx->ss->mapped) - r = 3; - else { - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - SHA256_Update(&ctx->ctx, SIGN_DATA(ctx), ctx->ss->length); - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - if (memcmp(sign, SIGN_END(ctx), sizeof sign)) - r = 4; - } - if (r) { - fprintf(stderr, "CHK(%p %s %p %s) = %d\n", - ctx, ctx->id, ctx->ss, - r > 1 ? ctx->ss->ident : "", r); - } - return (r); -} - -/*-------------------------------------------------------------------- - * Append data to a signature - */ -static void -smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) -{ - struct SHA256Context cx; - unsigned char sign[SHA256_LEN]; - - if (len != 0) { - SHA256_Update(&ctx->ctx, ptr, len); - ctx->ss->length += len; - } - cx = ctx->ctx; - SHA256_Update(&cx, &ctx->ss->length, sizeof(ctx->ss->length)); - SHA256_Final(sign, &cx); - memcpy(SIGN_END(ctx), sign, sizeof sign); -XXXAZ(smp_chk_sign(ctx)); -} - -/*-------------------------------------------------------------------- - * Reset a signature to empty, prepare for appending. - */ - -static void -smp_reset_sign(struct smp_signctx *ctx) -{ - - memset(ctx->ss, 0, sizeof *ctx->ss); - strcpy(ctx->ss->ident, ctx->id); - ctx->ss->unique = ctx->unique; - ctx->ss->mapped = (uintptr_t)ctx->ss; - SHA256_Init(&ctx->ctx); - SHA256_Update(&ctx->ctx, ctx->ss, - offsetof(struct smp_sign, length)); - smp_append_sign(ctx, NULL, 0); -} - -/*-------------------------------------------------------------------- - * Force a write of a signature block to the backing store. - */ - -static void -smp_sync_sign(const struct smp_signctx *ctx) -{ - int i; - - /* XXX: round to pages */ - i = msync((void*)ctx->ss, ctx->ss->length + SHA256_LEN, MS_SYNC); - if (i && 0) - fprintf(stderr, "SyncSign(%p %s) = %d %s\n", - ctx->ss, ctx->id, i, strerror(errno)); -} - -/*-------------------------------------------------------------------- - * Create and force a new signature to backing store - */ - -static void -smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id) -{ - smp_def_sign(sc, ctx, off, id); - smp_reset_sign(ctx); - smp_sync_sign(ctx); -} - - -/*-------------------------------------------------------------------:e - * Initialize a Silo with a valid but empty structure. - * - * XXX: more intelligent sizing of things. - */ - -void -smp_newsilo(struct smp_sc *sc) -{ - struct smp_ident *si; - - ASSERT_MGT(); - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - /* Choose a new random number */ - sc->unique = random(); - - smp_reset_sign(&sc->idn); - si = sc->ident; - - memset(si, 0, sizeof *si); - strcpy(si->ident, SMP_IDENT_STRING); - si->byte_order = 0x12345678; - si->size = sizeof *si; - si->major_version = 2; - si->unique = sc->unique; - si->mediasize = sc->mediasize; - si->granularity = sc->granularity; - /* - * Aim for cache-line-width - */ - si->align = sizeof(void*) * 2; - sc->align = si->align; - - si->stuff[SMP_BAN1_STUFF] = sc->granularity; - si->stuff[SMP_BAN2_STUFF] = si->stuff[SMP_BAN1_STUFF] + 1024*1024; - si->stuff[SMP_SEG1_STUFF] = si->stuff[SMP_BAN2_STUFF] + 1024*1024; - si->stuff[SMP_SEG2_STUFF] = si->stuff[SMP_SEG1_STUFF] + 1024*1024; - si->stuff[SMP_SPC_STUFF] = si->stuff[SMP_SEG2_STUFF] + 1024*1024; - si->stuff[SMP_END_STUFF] = si->mediasize; - assert(si->stuff[SMP_SPC_STUFF] < si->stuff[SMP_END_STUFF]); - - smp_new_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_new_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_new_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_new_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - smp_append_sign(&sc->idn, si, sizeof *si); - smp_sync_sign(&sc->idn); -} - -/*-------------------------------------------------------------------- - * Check if a silo is valid. - */ - -int -smp_valid_silo(struct smp_sc *sc) -{ - struct smp_ident *si; - int i, j; - - assert(strlen(SMP_IDENT_STRING) < sizeof si->ident); - - if (smp_chk_sign(&sc->idn)) - return (1); - - si = sc->ident; - if (strcmp(si->ident, SMP_IDENT_STRING)) - return (2); - if (si->byte_order != 0x12345678) - return (3); - if (si->size != sizeof *si) - return (4); - if (si->major_version != 2) - return (5); - if (si->mediasize != sc->mediasize) - return (7); - if (si->granularity != sc->granularity) - return (8); - if (si->align < sizeof(void*)) - return (9); - if (!PWR2(si->align)) - return (10); - sc->align = si->align; - sc->unique = si->unique; - - /* XXX: Sanity check stuff[6] */ - - assert(si->stuff[SMP_BAN1_STUFF] > sizeof *si + SHA256_LEN); - assert(si->stuff[SMP_BAN2_STUFF] > si->stuff[SMP_BAN1_STUFF]); - assert(si->stuff[SMP_SEG1_STUFF] > si->stuff[SMP_BAN2_STUFF]); - assert(si->stuff[SMP_SEG2_STUFF] > si->stuff[SMP_SEG1_STUFF]); - assert(si->stuff[SMP_SPC_STUFF] > si->stuff[SMP_SEG2_STUFF]); - assert(si->stuff[SMP_END_STUFF] == sc->mediasize); - - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_SEG1_STUFF) == - smp_stuff_len(sc, SMP_SEG2_STUFF)); - - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) > 65536); - assert(smp_stuff_len(sc, SMP_BAN1_STUFF) == - smp_stuff_len(sc, SMP_BAN2_STUFF)); - - smp_def_sign(sc, &sc->ban1, si->stuff[SMP_BAN1_STUFF], "BAN 1"); - smp_def_sign(sc, &sc->ban2, si->stuff[SMP_BAN2_STUFF], "BAN 2"); - smp_def_sign(sc, &sc->seg1, si->stuff[SMP_SEG1_STUFF], "SEG 1"); - smp_def_sign(sc, &sc->seg2, si->stuff[SMP_SEG2_STUFF], "SEG 2"); - - /* We must have one valid BAN table */ - i = smp_chk_sign(&sc->ban1); - j = smp_chk_sign(&sc->ban2); - if (i && j) - return (100 + i * 10 + j); - - /* We must have one valid SEG table */ - i = smp_chk_sign(&sc->seg1); - j = smp_chk_sign(&sc->seg2); - if (i && j) - return (200 + i * 10 + j); - return (0); -} - -/*-------------------------------------------------------------------- * Write the segmentlist back to the silo. * * We write the first copy, sync it synchronously, then write the diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index 45bf5d3..8184588 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -175,16 +175,21 @@ struct smp_sc { #define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) /* storage_persistent.c */ -void smp_newsilo(struct smp_sc *sc); -int smp_valid_silo(struct smp_sc *sc); - -void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id); /* storage_persistent_mgt.c */ void smp_mgt_init(struct stevedore *parent, int ac, char * const *av); -/*--------------------------------------------------------------------*/ +/* storage_persistent_subr.c */ +void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id); +int smp_chk_sign(struct smp_signctx *ctx); +void smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len); +void smp_reset_sign(struct smp_signctx *ctx); +void smp_sync_sign(const struct smp_signctx *ctx); +void smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, + uint64_t off, const char *id); +void smp_newsilo(struct smp_sc *sc); +int smp_valid_silo(struct smp_sc *sc); /*-------------------------------------------------------------------- * Caculate payload of some stuff diff --git a/bin/varnishd/storage_persistent_subr.c b/bin/varnishd/storage_persistent_subr.c index eeee9ae..bc6e1fd 100644 --- a/bin/varnishd/storage_persistent_subr.c +++ b/bin/varnishd/storage_persistent_subr.c @@ -38,34 +38,17 @@ #include "svnid.h" SVNID("$Id$") -#include -#include #include -#include -#include #include -#include -#include +#include #include #include "cache.h" -#include "stevedore.h" -#include "hash_slinger.h" #include "vsha256.h" -#include "cli.h" -#include "cli_priv.h" #include "persistent.h" #include "storage_persistent.h" -/*--------------------------------------------------------------------*/ - -/* - * silos is unlocked, it only changes during startup when we are - * single-threaded - */ -static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); - /*-------------------------------------------------------------------- * SIGNATURE functions * The signature is SHA256 over: @@ -96,7 +79,7 @@ smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, /*-------------------------------------------------------------------- * Check that a signature is good, leave state ready for append */ -static int +int smp_chk_sign(struct smp_signctx *ctx) { struct SHA256Context cx; @@ -131,7 +114,7 @@ smp_chk_sign(struct smp_signctx *ctx) /*-------------------------------------------------------------------- * Append data to a signature */ -static void +void smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len) { struct SHA256Context cx; @@ -152,7 +135,7 @@ XXXAZ(smp_chk_sign(ctx)); * Reset a signature to empty, prepare for appending. */ -static void +void smp_reset_sign(struct smp_signctx *ctx) { @@ -170,7 +153,7 @@ smp_reset_sign(struct smp_signctx *ctx) * Force a write of a signature block to the backing store. */ -static void +void smp_sync_sign(const struct smp_signctx *ctx) { int i; @@ -186,7 +169,7 @@ smp_sync_sign(const struct smp_signctx *ctx) * Create and force a new signature to backing store */ -static void +void smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, uint64_t off, const char *id) { @@ -195,7 +178,6 @@ smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, smp_sync_sign(ctx); } - /*-------------------------------------------------------------------:e * Initialize a Silo with a valid but empty structure. * @@ -317,1115 +299,3 @@ smp_valid_silo(struct smp_sc *sc) return (200 + i * 10 + j); return (0); } - -/*-------------------------------------------------------------------- - * Write the segmentlist back to the silo. - * - * We write the first copy, sync it synchronously, then write the - * second copy and sync it synchronously. - * - * Provided the kernel doesn't lie, that means we will always have - * at least one valid copy on in the silo. - */ - -static void -smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) -{ - struct smp_segptr *ss; - struct smp_seg *sg; - uint64_t length; - - Lck_AssertHeld(&sc->mtx); - smp_reset_sign(ctx); - ss = SIGN_DATA(ctx); - length = 0; - VTAILQ_FOREACH(sg, &sc->segments, list) { - assert(sg->p.offset < sc->mediasize); - assert(sg->p.offset + sg->p.length <= sc->mediasize); - *ss = sg->p; - ss++; - length += sizeof *ss; - } - smp_append_sign(ctx, SIGN_DATA(ctx), length); - smp_sync_sign(ctx); -} - -static void -smp_save_segs(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - - /* - * Remove empty segments from the front of the list - * before we write the segments to disk. - */ - VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { - if (sg->nobj > 0) - break; - if (sg == sc->cur_seg) - continue; - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - } - smp_save_seg(sc, &sc->seg1); - smp_save_seg(sc, &sc->seg2); -} - - -/*--------------------------------------------------------------------- - */ - -static struct smp_object * -smp_find_so(const struct smp_seg *sg, const struct objcore *oc) -{ - struct smp_object *so; - unsigned smp_idx; - - smp_idx = oc->priv2; - assert(smp_idx > 0); - assert(smp_idx <= sg->p.lobjlist); - so = &sg->objs[sg->p.lobjlist - smp_idx]; - return (so); -} - -/*--------------------------------------------------------------------- - * Check if a given storage structure is valid to use - */ - -static int -smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, - const struct storage *st) -{ - struct smp_seg *sg2; - const uint8_t *pst; - uint64_t o; - - (void)sg; /* XXX: faster: Start search from here */ - pst = (const void *)st; - - if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) - return (0x01); /* Before silo payload start */ - if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) - return (0x02); /* After silo end */ - - o = pst - sc->base; - - /* Find which segment contains the storage structure */ - VTAILQ_FOREACH(sg2, &sc->segments, list) - if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) - break; - if (sg2 == NULL) - return (0x04); /* No claiming segment */ - if (!(sg2->flags & SMP_SEG_LOADED)) - return (0x08); /* Claiming segment not loaded */ - - /* It is now safe to access the storage structure */ - if (st->magic != STORAGE_MAGIC) - return (0x10); /* Not enough magic */ - - if (o + st->space >= sg2->p.objlist) - return (0x20); /* Allocation not inside segment */ - - if (st->len > st->space) - return (0x40); /* Plain bad... */ - - /* - * XXX: We could patch up st->stevedore and st->priv here - * XXX: but if things go right, we will never need them. - */ - return (0); -} - -/*--------------------------------------------------------------------- - * objcore methods for persistent objects - */ - -static struct object * -smp_oc_getobj(struct worker *wrk, struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - struct storage *st; - uint64_t l; - int bad; - - /* Some calls are direct, but they should match anyway */ - assert(oc->methods->getobj == smp_oc_getobj); - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (wrk == NULL) - AZ(oc->flags & OC_F_NEEDFIXUP); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - o = (void*)(sg->sc->base + so->ptr); - /* - * The object may not be in this segment since we allocate it - * In a separate operation than the smp_object. We could check - * that it is in a later segment, but that would be complicated. - * XXX: For now, be happy if it is inside th silo - */ - ASSERT_PTR_IN_SILO(sg->sc, o); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - - /* - * If this flag is not set, it will not be, and the lock is not - * needed to test it. - */ - if (!(oc->flags & OC_F_NEEDFIXUP)) - return (o); - - AN(wrk); - Lck_Lock(&sg->sc->mtx); - /* Check again, we might have raced. */ - if (oc->flags & OC_F_NEEDFIXUP) { - /* We trust caller to have a refcnt for us */ - o->objcore = oc; - - bad = 0; - l = 0; - VTAILQ_FOREACH(st, &o->store, list) { - bad |= smp_loaded_st(sg->sc, sg, st); - if (bad) - break; - l += st->len; - } - if (l != o->len) - bad |= 0x100; - - if(bad) { - o->ttl = 0; - o->grace = 0; - so->ttl = 0; - } - - sg->nfixed++; - wrk->stats.n_object++; - wrk->stats.n_vampireobject--; - oc->flags &= ~OC_F_NEEDFIXUP; - } - Lck_Unlock(&sg->sc->mtx); - return (o); -} - -static void -smp_oc_updatemeta(struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - double mttl; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - so = smp_find_so(sg, oc); - - if (isnan(o->grace)) - mttl = o->ttl; - else - mttl = - (o->ttl + o->grace); - - if (sg == sg->sc->cur_seg) { - /* Lock necessary, we might race close_seg */ - Lck_Lock(&sg->sc->mtx); - so->ban = o->ban_t; - so->ttl = mttl; - Lck_Unlock(&sg->sc->mtx); - } else { - so->ban = o->ban_t; - so->ttl = mttl; - } -} - -static void __match_proto__() -smp_oc_freeobj(struct objcore *oc) -{ - struct smp_seg *sg; - struct smp_object *so; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - Lck_Lock(&sg->sc->mtx); - so->ttl = 0; - so->ptr = 0; - - assert(sg->nobj > 0); - assert(sg->nfixed > 0); - sg->nobj--; - sg->nfixed--; - - Lck_Unlock(&sg->sc->mtx); -} - -static struct objcore_methods smp_oc_methods = { - .getobj = smp_oc_getobj, - .updatemeta = smp_oc_updatemeta, - .freeobj = smp_oc_freeobj, -}; - -/*-------------------------------------------------------------------- - * Add a new ban to all silos - */ - -static void -smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, - uint32_t flags, uint32_t len, const char *ban) -{ - uint8_t *ptr, *ptr2; - - (void)sc; - ptr = ptr2 = SIGN_END(ctx); - - memcpy(ptr, "BAN", 4); - ptr += 4; - - memcpy(ptr, &t0, sizeof t0); - ptr += sizeof t0; - - memcpy(ptr, &flags, sizeof flags); - ptr += sizeof flags; - - memcpy(ptr, &len, sizeof len); - ptr += sizeof len; - - memcpy(ptr, ban, len); - ptr += len; - - smp_append_sign(ctx, ptr2, ptr - ptr2); -} - -void -SMP_NewBan(double t0, const char *ban) -{ - struct smp_sc *sc; - uint32_t l = strlen(ban) + 1; - - VTAILQ_FOREACH(sc, &silos, list) { - smp_appendban(sc, &sc->ban1, t0, 0, l, ban); - smp_appendban(sc, &sc->ban2, t0, 0, l, ban); - } -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a ban list - */ - -static int -smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint8_t *ptr, *pe; - double t0; - uint32_t flags, length; - int i, retval = 0; - - ASSERT_CLI(); - (void)sc; - i = smp_chk_sign(ctx); - if (i) - return (i); - ptr = SIGN_DATA(ctx); - pe = ptr + ctx->ss->length; - - while (ptr < pe) { - if (memcmp(ptr, "BAN", 4)) { - retval = 1001; - break; - } - ptr += 4; - - memcpy(&t0, ptr, sizeof t0); - ptr += sizeof t0; - - memcpy(&flags, ptr, sizeof flags); - ptr += sizeof flags; - if (flags != 0) { - retval = 1002; - break; - } - - memcpy(&length, ptr, sizeof length); - ptr += sizeof length; - if (ptr + length > pe) { - retval = 1003; - break; - } - - if (ptr[length - 1] != '\0') { - retval = 1004; - break; - } - - BAN_Reload(t0, flags, (const char *)ptr); - - ptr += length; - } - assert(ptr <= pe); - return (retval); -} - - -/*--------------------------------------------------------------------*/ - -static uint64_t -smp_segend(const struct smp_seg *sg) -{ - - return (sg->p.offset + sg->p.length); -} - -static uint64_t -smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) -{ - - IASSERTALIGN(sc, sc->next_bot); - assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - assert(sc->next_bot >= sg->p.offset); - assert(sc->next_top < sg->p.offset + sg->p.length); - return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); -} - -/*-------------------------------------------------------------------- - * Load segments - * - * The overall objective is to register the existence of an object, based - * only on the minimally sized struct smp_object, without causing the - * main object to be faulted in. - * - * XXX: We can test this by mprotecting the main body of the segment - * XXX: until the first fixup happens, or even just over this loop, - * XXX: However: the requires that the smp_objects starter further - * XXX: into the segment than a page so that they do not get hit - * XXX: by the protection. - */ - -static void -smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) -{ - struct smp_object *so; - struct objcore *oc; - uint32_t no; - double t_now = TIM_real(); - struct smp_signctx ctx[1]; - - ASSERT_SILO_THREAD(sc); - CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - assert(sg->flags & SMP_SEG_MUSTLOAD); - sg->flags &= ~SMP_SEG_MUSTLOAD; - AN(sg->p.offset); - if (sg->p.objlist == 0) - return; - smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); - if (smp_chk_sign(ctx)) - return; - - /* test SEGTAIL */ - /* test OBJIDX */ - so = (void*)(sc->base + sg->p.objlist); - sg->objs = so; - no = sg->p.lobjlist; - /* Clear the bogus "hold" count */ - sg->nobj = 0; - for (;no > 0; so++,no--) { - if (so->ttl > 0 && so->ttl < t_now) - continue; - if (so->ttl < 0 && -so->ttl < t_now) - continue; - HSH_Prealloc(sp); - oc = sp->wrk->nobjcore; - oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; - oc->flags &= ~OC_F_BUSY; - oc->priv = sg; - oc->priv2 = no; - oc->methods = &smp_oc_methods; - oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); - memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); - (void)HSH_Insert(sp); - AZ(sp->wrk->nobjcore); - EXP_Inject(oc, sg->lru, fabs(so->ttl)); - sg->nobj++; - } - WRK_SumStat(sp->wrk); - sg->flags |= SMP_SEG_LOADED; -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a segment list - */ - -static int -smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint64_t length, l; - struct smp_segptr *ss, *se; - struct smp_seg *sg, *sg1, *sg2; - int i, n = 0; - - ASSERT_CLI(); - i = smp_chk_sign(ctx); - if (i) - return (i); - - ss = SIGN_DATA(ctx); - length = ctx->ss->length; - - if (length == 0) { - /* No segments */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - return (0); - } - se = ss + length / sizeof *ss; - se--; - assert(ss <= se); - - /* - * Locate the free reserve, there are only two basic cases, - * but once we start dropping segments, things gets more complicated. - */ - - sc->free_offset = se->offset + se->length; - l = sc->mediasize - sc->free_offset; - if (se->offset > ss->offset && l >= sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * Plenty of space at tail, do nothing. - */ - } else if (ss->offset > se->offset) { - /* - * [zzzz____xxxxyyyy_] - * (make) space between ends - * We might nuke the entire tail end without getting - * enough space, in which case we fall through to the - * last check. - */ - while (ss < se && ss->offset > se->offset) { - l = ss->offset - (se->offset + se->length); - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - if (l < sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * (make) space at front - */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - while (ss < se) { - l = ss->offset - sc->free_offset; - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - assert (l >= sc->free_reserve); - - - sg1 = NULL; - sg2 = NULL; - for(; ss <= se; ss++) { - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - sg->p = *ss; - - sg->flags |= SMP_SEG_MUSTLOAD; - - /* - * HACK: prevent save_segs from nuking segment until we have - * HACK: loaded it. - */ - sg->nobj = 1; - if (sg1 != NULL) { - assert(sg1->p.offset != sg->p.offset); - if (sg1->p.offset < sg->p.offset) - assert(smp_segend(sg1) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg1->p.offset); - } - if (sg2 != NULL) { - assert(sg2->p.offset != sg->p.offset); - if (sg2->p.offset < sg->p.offset) - assert(smp_segend(sg2) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg2->p.offset); - } - - /* XXX: check that they are inside silo */ - /* XXX: check that they don't overlap */ - /* XXX: check that they are serial */ - sg->sc = sc; - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - sg2 = sg; - if (sg1 == NULL) - sg1 = sg; - } - printf("Dropped %d segments to make free_reserve\n", n); - return (0); -} - -/*-------------------------------------------------------------------- - * Create a new segment - */ - -static void -smp_new_seg(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->sc = sc; - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - - /* XXX: find where it goes in silo */ - - sg->p.offset = sc->free_offset; - // XXX: align */ - assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); - assert(sg->p.offset < sc->mediasize); - - sg->p.length = sc->aim_segl; - sg->p.length &= ~7; - - if (smp_segend(sg) > sc->mediasize) { - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - sg->p.offset = sc->free_offset; - sg2 = VTAILQ_FIRST(&sc->segments); - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - } - - - assert(smp_segend(sg) <= sc->mediasize); - - sg2 = VTAILQ_FIRST(&sc->segments); - if (sg2 != NULL && sg2->p.offset > sc->free_offset) { - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - assert(smp_segend(sg) <= sg2->p.offset); - } - - sg->p.offset = IRNUP(sc, sg->p.offset); - sg->p.length = IRNDN(sc, sg->p.length); - sc->free_offset = sg->p.offset + sg->p.length; - - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - - /* Neuter the new segment in case there is an old one there */ - AN(sg->p.offset); - smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Set up our allocation points */ - sc->cur_seg = sg; - sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); - sc->next_top = smp_segend(sg); - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - IASSERTALIGN(sc, sc->next_bot); - IASSERTALIGN(sc, sc->next_top); - sg->objs = (void*)(sc->base + sc->next_top); -} - -/*-------------------------------------------------------------------- - * Close a segment - */ - -static void -smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) -{ - uint64_t left, dst, len; - void *dp; - - Lck_AssertHeld(&sc->mtx); - - assert(sg == sc->cur_seg); - AN(sg->p.offset); - sc->cur_seg = NULL; - - if (sg->nalloc == 0) { - /* XXX: if segment is empty, delete instead */ - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - return; - } - - /* - * If there is enough space left, that we can move the smp_objects - * down without overwriting the present copy, we will do so to - * compact the segment. - */ - left = smp_spaceleft(sc, sg); - len = sizeof(struct smp_object) * sg->p.lobjlist; - if (len < left) { - dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); - dp = sc->base + dst; - assert((uintptr_t)dp + len < (uintptr_t)sg->objs); - memcpy(dp, sg->objs, len); - sc->next_top = dst; - sg->objs = dp; - sg->p.length = (sc->next_top - sg->p.offset) - + len + IRNUP(sc, SMP_SIGN_SPACE); - (void)smp_spaceleft(sc, sg); /* for the asserts */ - - } - - /* Update the segment header */ - sg->p.objlist = sc->next_top; - - /* Write the (empty) OBJIDX signature */ - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - assert(sc->next_top >= sc->next_bot); - smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Write the (empty) SEGTAIL signature */ - smp_def_sign(sc, sg->ctx, - sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Save segment list */ - smp_save_segs(sc); - sc->free_offset = smp_segend(sg); -} - -/*-------------------------------------------------------------------- - * Silo worker thread - */ - -static void * -smp_thread(struct sess *sp, void *priv) -{ - struct smp_sc *sc; - struct smp_seg *sg; - - (void)sp; - CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); - - /* First, load all the objects from all segments */ - VTAILQ_FOREACH(sg, &sc->segments, list) - if (sg->flags & SMP_SEG_MUSTLOAD) - smp_load_seg(sp, sc, sg); - - sc->flags |= SMP_SC_LOADED; - BAN_Deref(&sc->tailban); - sc->tailban = NULL; - printf("Silo completely loaded\n"); - while (1) - (void)sleep (1); - NEEDLESS_RETURN(NULL); -} - -/*-------------------------------------------------------------------- - * Open a silo in the worker process - */ - -static void -smp_open(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - - Lck_New(&sc->mtx, lck_smp); - Lck_Lock(&sc->mtx); - - sc->stevedore = st; - - /* We trust the parent to give us a valid silo, for good measure: */ - AZ(smp_valid_silo(sc)); - - AZ(mprotect(sc->base, 4096, PROT_READ)); - - sc->ident = SIGN_DATA(&sc->idn); - - /* We attempt ban1 first, and if that fails, try ban2 */ - if (smp_open_bans(sc, &sc->ban1)) - AZ(smp_open_bans(sc, &sc->ban2)); - - /* We attempt seg1 first, and if that fails, try seg2 */ - if (smp_open_segs(sc, &sc->seg1)) - AZ(smp_open_segs(sc, &sc->seg2)); - - sc->tailban = BAN_TailRef(); - AN(sc->tailban); - - /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ - - /* XXX: abandon early segments to make sure we have free space ? */ - - /* Open a new segment, so we are ready to write */ - smp_new_seg(sc); - - /* Start the worker silo worker thread, it will load the objects */ - WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); - - VTAILQ_INSERT_TAIL(&silos, sc, list); - Lck_Unlock(&sc->mtx); -} - -/*-------------------------------------------------------------------- - * Close a silo - */ - -static void -smp_close(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - Lck_Lock(&sc->mtx); - smp_close_seg(sc, sc->cur_seg); - Lck_Unlock(&sc->mtx); - - /* XXX: reap thread */ -} - -/*-------------------------------------------------------------------- - * Allocate a bite. - * - * Allocate [min_size...max_size] space from the bottom of the segment, - * as is convenient. - * - * If 'so' + 'idx' is given, also allocate a smp_object from the top - * of the segment. - * - * Return the segment in 'ssg' if given. - */ - -static struct storage * -smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, - struct smp_object **so, unsigned *idx, struct smp_seg **ssg) -{ - struct smp_sc *sc; - struct storage *ss; - struct smp_seg *sg; - unsigned tries; - uint64_t left, extra; - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - assert(min_size <= max_size); - - max_size = IRNUP(sc, max_size); - min_size = IRNUP(sc, min_size); - - extra = IRNUP(sc, sizeof(*ss)); - if (so != NULL) { - extra += sizeof(**so); - AN(idx); - } - - Lck_Lock(&sc->mtx); - sg = NULL; - ss = NULL; - for (tries = 0; tries < 3; tries++) { - left = smp_spaceleft(sc, sc->cur_seg); - if (left >= extra + min_size) - break; - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } - if (left >= extra + min_size) { - if (left < extra + max_size) - max_size = IRNDN(sc, left - extra); - - sg = sc->cur_seg; - ss = (void*)(sc->base + sc->next_bot); - sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); - sg->nalloc++; - if (so != NULL) { - sc->next_top -= sizeof(**so); - *so = (void*)(sc->base + sc->next_top); - /* Render this smp_object mostly harmless */ - (*so)->ttl = 0.; - (*so)->ban = 0.; - (*so)->ptr = 0;; - sg->objs = *so; - *idx = ++sg->p.lobjlist; - } - (void)smp_spaceleft(sc, sg); /* for the assert */ - } - Lck_Unlock(&sc->mtx); - - if (ss == NULL) - return (ss); - AN(sg); - assert(max_size >= min_size); - - /* Fill the storage structure */ - memset(ss, 0, sizeof *ss); - ss->magic = STORAGE_MAGIC; - ss->ptr = PRNUP(sc, ss + 1); - ss->space = max_size; - ss->priv = sc; - ss->stevedore = st; - ss->fd = sc->fd; - if (ssg != NULL) - *ssg = sg; - return (ss); -} - -/*-------------------------------------------------------------------- - * Find the per-segment lru list for this object - */ - -static struct lru * -smp_getlru(const struct object *o) -{ - struct smp_seg *sg; - - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); - return (sg->lru); -} - -/*-------------------------------------------------------------------- - * Allocate an object - */ - -static struct object * -smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, - const struct stv_objsecrets *soc) -{ - struct object *o; - struct storage *st; - struct smp_sc *sc; - struct smp_seg *sg; - struct smp_object *so; - struct objcore *oc; - unsigned objidx; - - CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); - AN(sp->objcore); - AN(sp->wrk->ttl >= 0); - - ltot = IRNUP(sc, ltot); - - st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); - if (st == NULL) - return (NULL); - - assert(st->space >= ltot); - ltot = st->len = st->space; - - o = STV_MkObject(sp, st->ptr, ltot, soc); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - o->objstore = st; - - oc = o->objcore; - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - oc->flags |= OC_F_LRUDONTMOVE; - - Lck_Lock(&sc->mtx); - sg->nfixed++; - sg->nobj++; - - /* We have to do this somewhere, might as well be here... */ - assert(sizeof so->hash == DIGEST_LEN); - memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); - so->ttl = o->ttl; /* XXX: grace? */ - so->ptr = (uint8_t*)o - sc->base; - so->ban = o->ban_t; - - oc->priv = sg; - oc->priv2 = objidx; - oc->methods = &smp_oc_methods; - - Lck_Unlock(&sc->mtx); - return (o); -} - -/*-------------------------------------------------------------------- - * Allocate a bite - */ - -static struct storage * -smp_alloc(struct stevedore *st, size_t size) -{ - - return (smp_allocx(st, - size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); -} - -/*-------------------------------------------------------------------- - * Trim a bite - * XXX: We could trim the last allocation. - */ - -static void -smp_trim(struct storage *ss, size_t size) -{ - - (void)ss; - (void)size; -} - -/*-------------------------------------------------------------------- - * We don't track frees of storage, we track the objects which own the - * storage and when there are no more objects in in the first segment, - * it can be reclaimed. - * XXX: We could free the last allocation, but does that happen ? - */ - -static void __match_proto__(storage_free_f) -smp_free(struct storage *st) -{ - - /* XXX */ - (void)st; -} - -/*-------------------------------------------------------------------- - * Pause until all silos have loaded. - */ - -void -SMP_Ready(void) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - do { - VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_SC_LOADED)) - break; - if (sc != NULL) - (void)sleep(1); - } while (sc != NULL); -} - -/*--------------------------------------------------------------------*/ - -const struct stevedore smp_stevedore = { - .magic = STEVEDORE_MAGIC, - .name = "persistent", - .init = smp_mgt_init, - .open = smp_open, - .close = smp_close, - .alloc = smp_alloc, - .allocobj = smp_allocobj, - .getlru = smp_getlru, - .free = smp_free, - .trim = smp_trim, -}; - -/*-------------------------------------------------------------------- - * Persistence is a bear to test unadultered, so we cheat by adding - * a cli command we can use to make it do tricks for us. - */ - -static void -debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) -{ - struct smp_seg *sg; - struct objcore *oc; - - cli_out(cli, "Silo: %s (%s)\n", - sc->stevedore->ident, sc->filename); - VTAILQ_FOREACH(sg, &sc->segments, list) { - cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", - (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); - if (sg == sc->cur_seg) - cli_out(cli, - " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", - (uintmax_t)(sc->next_bot), - (uintmax_t)(sc->next_top), - (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", - sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); - if (objs) { - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) - cli_out(cli, " %s %p\n", - oc == &sg->lru->senteniel ? - "senteniel" : "OC: ", oc); - } - } -} - -static void -debug_persistent(struct cli *cli, const char * const * av, void *priv) -{ - struct smp_sc *sc; - - (void)priv; - - if (av[2] == NULL) { - VTAILQ_FOREACH(sc, &silos, list) - debug_report_silo(cli, sc, 0); - return; - } - VTAILQ_FOREACH(sc, &silos, list) - if (!strcmp(av[2], sc->stevedore->ident)) - break; - if (sc == NULL) { - cli_out(cli, "Silo <%s> not found\n", av[2]); - cli_result(cli, CLIS_PARAM); - return; - } - if (av[3] == NULL) { - debug_report_silo(cli, sc, 0); - return; - } - Lck_Lock(&sc->mtx); - if (!strcmp(av[3], "sync")) { - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } else if (!strcmp(av[3], "dump")) { - debug_report_silo(cli, sc, 1); - } else { - cli_out(cli, "Unknown operation\n"); - cli_result(cli, CLIS_PARAM); - } - Lck_Unlock(&sc->mtx); -} - -static struct cli_proto debug_cmds[] = { - { "debug.persistent", "debug.persistent", - "Persistent debugging magic:\n" - "\tdebug.persistent [stevedore [cmd]]\n" - "With no cmd arg, a summary of the silo is returned.\n" - "Possible commands:\n" - "\tsync\tClose current segment, open a new one\n" - "\tdump\tinclude objcores in silo summary\n" - "", - 0, 2, "d", debug_persistent }, - { NULL } -}; - -void -SMP_Init(void) -{ - CLI_AddFuncs(debug_cmds); -} From phk at varnish-cache.org Tue Feb 8 10:50:38 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:38 +0100 Subject: [master] 63105cc Clone from storage_persistent.c Message-ID: commit 63105cc286d2f45000f27e5caadb08c3ed9e82e0 Author: Poul-Henning Kamp Date: Tue Feb 8 10:22:37 2011 +0000 Clone from storage_persistent.c diff --git a/bin/varnishd/storage_persistent_silo.c b/bin/varnishd/storage_persistent_silo.c new file mode 100644 index 0000000..4a971cb --- /dev/null +++ b/bin/varnishd/storage_persistent_silo.c @@ -0,0 +1,1178 @@ +/*- + * Copyright (c) 2008-2010 Linpro AS + * All rights reserved. + * + * Author: Poul-Henning Kamp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Persistent storage method + * + * XXX: Before we start the client or maybe after it stops, we should give the + * XXX: stevedores a chance to examine their storage for consistency. + * + * XXX: Do we ever free the LRU-lists ? + */ + +#include "config.h" + +#include "svnid.h" +SVNID("$Id$") + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cache.h" +#include "stevedore.h" +#include "hash_slinger.h" +#include "vsha256.h" +#include "cli.h" +#include "cli_priv.h" + +#include "persistent.h" +#include "storage_persistent.h" + +/*--------------------------------------------------------------------*/ + +/* + * silos is unlocked, it only changes during startup when we are + * single-threaded + */ +static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); + +/*-------------------------------------------------------------------- + * Write the segmentlist back to the silo. + * + * We write the first copy, sync it synchronously, then write the + * second copy and sync it synchronously. + * + * Provided the kernel doesn't lie, that means we will always have + * at least one valid copy on in the silo. + */ + +static void +smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) +{ + struct smp_segptr *ss; + struct smp_seg *sg; + uint64_t length; + + Lck_AssertHeld(&sc->mtx); + smp_reset_sign(ctx); + ss = SIGN_DATA(ctx); + length = 0; + VTAILQ_FOREACH(sg, &sc->segments, list) { + assert(sg->p.offset < sc->mediasize); + assert(sg->p.offset + sg->p.length <= sc->mediasize); + *ss = sg->p; + ss++; + length += sizeof *ss; + } + smp_append_sign(ctx, SIGN_DATA(ctx), length); + smp_sync_sign(ctx); +} + +static void +smp_save_segs(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + + /* + * Remove empty segments from the front of the list + * before we write the segments to disk. + */ + VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { + if (sg->nobj > 0) + break; + if (sg == sc->cur_seg) + continue; + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + } + smp_save_seg(sc, &sc->seg1); + smp_save_seg(sc, &sc->seg2); +} + + +/*--------------------------------------------------------------------- + */ + +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) +{ + struct smp_object *so; + unsigned smp_idx; + + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; + return (so); +} + +/*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use + */ + +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) +{ + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; + + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; + + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ + + o = pst - sc->base; + + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) + break; + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ + + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ + + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ + + if (st->len > st->space) + return (0x40); /* Plain bad... */ + + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); +} + +/*--------------------------------------------------------------------- + * objcore methods for persistent objects + */ + +static struct object * +smp_oc_getobj(struct worker *wrk, struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + struct storage *st; + uint64_t l; + int bad; + + /* Some calls are direct, but they should match anyway */ + assert(oc->methods->getobj == smp_oc_getobj); + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + if (wrk == NULL) + AZ(oc->flags & OC_F_NEEDFIXUP); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + o = (void*)(sg->sc->base + so->ptr); + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + + /* + * If this flag is not set, it will not be, and the lock is not + * needed to test it. + */ + if (!(oc->flags & OC_F_NEEDFIXUP)) + return (o); + + AN(wrk); + Lck_Lock(&sg->sc->mtx); + /* Check again, we might have raced. */ + if (oc->flags & OC_F_NEEDFIXUP) { + /* We trust caller to have a refcnt for us */ + o->objcore = oc; + + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; + + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } + + sg->nfixed++; + wrk->stats.n_object++; + wrk->stats.n_vampireobject--; + oc->flags &= ~OC_F_NEEDFIXUP; + } + Lck_Unlock(&sg->sc->mtx); + return (o); +} + +static void +smp_oc_updatemeta(struct objcore *oc) +{ + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + double mttl; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + o = smp_oc_getobj(NULL, oc); + AN(o); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); + so = smp_find_so(sg, oc); + + if (isnan(o->grace)) + mttl = o->ttl; + else + mttl = - (o->ttl + o->grace); + + if (sg == sg->sc->cur_seg) { + /* Lock necessary, we might race close_seg */ + Lck_Lock(&sg->sc->mtx); + so->ban = o->ban_t; + so->ttl = mttl; + Lck_Unlock(&sg->sc->mtx); + } else { + so->ban = o->ban_t; + so->ttl = mttl; + } +} + +static void __match_proto__() +smp_oc_freeobj(struct objcore *oc) +{ + struct smp_seg *sg; + struct smp_object *so; + + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); + + Lck_Lock(&sg->sc->mtx); + so->ttl = 0; + so->ptr = 0; + + assert(sg->nobj > 0); + assert(sg->nfixed > 0); + sg->nobj--; + sg->nfixed--; + + Lck_Unlock(&sg->sc->mtx); +} + +static struct objcore_methods smp_oc_methods = { + .getobj = smp_oc_getobj, + .updatemeta = smp_oc_updatemeta, + .freeobj = smp_oc_freeobj, +}; + +/*-------------------------------------------------------------------- + * Add a new ban to all silos + */ + +static void +smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, + uint32_t flags, uint32_t len, const char *ban) +{ + uint8_t *ptr, *ptr2; + + (void)sc; + ptr = ptr2 = SIGN_END(ctx); + + memcpy(ptr, "BAN", 4); + ptr += 4; + + memcpy(ptr, &t0, sizeof t0); + ptr += sizeof t0; + + memcpy(ptr, &flags, sizeof flags); + ptr += sizeof flags; + + memcpy(ptr, &len, sizeof len); + ptr += sizeof len; + + memcpy(ptr, ban, len); + ptr += len; + + smp_append_sign(ctx, ptr2, ptr - ptr2); +} + +void +SMP_NewBan(double t0, const char *ban) +{ + struct smp_sc *sc; + uint32_t l = strlen(ban) + 1; + + VTAILQ_FOREACH(sc, &silos, list) { + smp_appendban(sc, &sc->ban1, t0, 0, l, ban); + smp_appendban(sc, &sc->ban2, t0, 0, l, ban); + } +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a ban list + */ + +static int +smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint8_t *ptr, *pe; + double t0; + uint32_t flags, length; + int i, retval = 0; + + ASSERT_CLI(); + (void)sc; + i = smp_chk_sign(ctx); + if (i) + return (i); + ptr = SIGN_DATA(ctx); + pe = ptr + ctx->ss->length; + + while (ptr < pe) { + if (memcmp(ptr, "BAN", 4)) { + retval = 1001; + break; + } + ptr += 4; + + memcpy(&t0, ptr, sizeof t0); + ptr += sizeof t0; + + memcpy(&flags, ptr, sizeof flags); + ptr += sizeof flags; + if (flags != 0) { + retval = 1002; + break; + } + + memcpy(&length, ptr, sizeof length); + ptr += sizeof length; + if (ptr + length > pe) { + retval = 1003; + break; + } + + if (ptr[length - 1] != '\0') { + retval = 1004; + break; + } + + BAN_Reload(t0, flags, (const char *)ptr); + + ptr += length; + } + assert(ptr <= pe); + return (retval); +} + + +/*--------------------------------------------------------------------*/ + +static uint64_t +smp_segend(const struct smp_seg *sg) +{ + + return (sg->p.offset + sg->p.length); +} + +static uint64_t +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) +{ + + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); +} + +/*-------------------------------------------------------------------- + * Load segments + * + * The overall objective is to register the existence of an object, based + * only on the minimally sized struct smp_object, without causing the + * main object to be faulted in. + * + * XXX: We can test this by mprotecting the main body of the segment + * XXX: until the first fixup happens, or even just over this loop, + * XXX: However: the requires that the smp_objects starter further + * XXX: into the segment than a page so that they do not get hit + * XXX: by the protection. + */ + +static void +smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) +{ + struct smp_object *so; + struct objcore *oc; + uint32_t no; + double t_now = TIM_real(); + struct smp_signctx ctx[1]; + + ASSERT_SILO_THREAD(sc); + CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + assert(sg->flags & SMP_SEG_MUSTLOAD); + sg->flags &= ~SMP_SEG_MUSTLOAD; + AN(sg->p.offset); + if (sg->p.objlist == 0) + return; + smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); + if (smp_chk_sign(ctx)) + return; + + /* test SEGTAIL */ + /* test OBJIDX */ + so = (void*)(sc->base + sg->p.objlist); + sg->objs = so; + no = sg->p.lobjlist; + /* Clear the bogus "hold" count */ + sg->nobj = 0; + for (;no > 0; so++,no--) { + if (so->ttl > 0 && so->ttl < t_now) + continue; + if (so->ttl < 0 && -so->ttl < t_now) + continue; + HSH_Prealloc(sp); + oc = sp->wrk->nobjcore; + oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; + oc->flags &= ~OC_F_BUSY; + oc->priv = sg; + oc->priv2 = no; + oc->methods = &smp_oc_methods; + oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); + memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); + (void)HSH_Insert(sp); + AZ(sp->wrk->nobjcore); + EXP_Inject(oc, sg->lru, fabs(so->ttl)); + sg->nobj++; + } + WRK_SumStat(sp->wrk); + sg->flags |= SMP_SEG_LOADED; +} + +/*-------------------------------------------------------------------- + * Attempt to open and read in a segment list + */ + +static int +smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) +{ + uint64_t length, l; + struct smp_segptr *ss, *se; + struct smp_seg *sg, *sg1, *sg2; + int i, n = 0; + + ASSERT_CLI(); + i = smp_chk_sign(ctx); + if (i) + return (i); + + ss = SIGN_DATA(ctx); + length = ctx->ss->length; + + if (length == 0) { + /* No segments */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + return (0); + } + se = ss + length / sizeof *ss; + se--; + assert(ss <= se); + + /* + * Locate the free reserve, there are only two basic cases, + * but once we start dropping segments, things gets more complicated. + */ + + sc->free_offset = se->offset + se->length; + l = sc->mediasize - sc->free_offset; + if (se->offset > ss->offset && l >= sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * Plenty of space at tail, do nothing. + */ + } else if (ss->offset > se->offset) { + /* + * [zzzz____xxxxyyyy_] + * (make) space between ends + * We might nuke the entire tail end without getting + * enough space, in which case we fall through to the + * last check. + */ + while (ss < se && ss->offset > se->offset) { + l = ss->offset - (se->offset + se->length); + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + if (l < sc->free_reserve) { + /* + * [__xxxxyyyyzzzz___] + * (make) space at front + */ + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + while (ss < se) { + l = ss->offset - sc->free_offset; + if (l > sc->free_reserve) + break; + ss++; + n++; + } + } + + assert (l >= sc->free_reserve); + + + sg1 = NULL; + sg2 = NULL; + for(; ss <= se; ss++) { + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + sg->p = *ss; + + sg->flags |= SMP_SEG_MUSTLOAD; + + /* + * HACK: prevent save_segs from nuking segment until we have + * HACK: loaded it. + */ + sg->nobj = 1; + if (sg1 != NULL) { + assert(sg1->p.offset != sg->p.offset); + if (sg1->p.offset < sg->p.offset) + assert(smp_segend(sg1) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg1->p.offset); + } + if (sg2 != NULL) { + assert(sg2->p.offset != sg->p.offset); + if (sg2->p.offset < sg->p.offset) + assert(smp_segend(sg2) <= sg->p.offset); + else + assert(smp_segend(sg) <= sg2->p.offset); + } + + /* XXX: check that they are inside silo */ + /* XXX: check that they don't overlap */ + /* XXX: check that they are serial */ + sg->sc = sc; + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + sg2 = sg; + if (sg1 == NULL) + sg1 = sg; + } + printf("Dropped %d segments to make free_reserve\n", n); + return (0); +} + +/*-------------------------------------------------------------------- + * Create a new segment + */ + +static void +smp_new_seg(struct smp_sc *sc) +{ + struct smp_seg *sg, *sg2; + + Lck_AssertHeld(&sc->mtx); + ALLOC_OBJ(sg, SMP_SEG_MAGIC); + AN(sg); + sg->sc = sc; + sg->lru = LRU_Alloc(); + CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); + + /* XXX: find where it goes in silo */ + + sg->p.offset = sc->free_offset; + // XXX: align */ + assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); + assert(sg->p.offset < sc->mediasize); + + sg->p.length = sc->aim_segl; + sg->p.length &= ~7; + + if (smp_segend(sg) > sc->mediasize) { + sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; + sg->p.offset = sc->free_offset; + sg2 = VTAILQ_FIRST(&sc->segments); + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + } + + + assert(smp_segend(sg) <= sc->mediasize); + + sg2 = VTAILQ_FIRST(&sc->segments); + if (sg2 != NULL && sg2->p.offset > sc->free_offset) { + if (smp_segend(sg) > sg2->p.offset) { + printf("Out of space in persistent silo\n"); + printf("Committing suicide, restart will make space\n"); + exit (0); + } + assert(smp_segend(sg) <= sg2->p.offset); + } + + sg->p.offset = IRNUP(sc, sg->p.offset); + sg->p.length = IRNDN(sc, sg->p.length); + sc->free_offset = sg->p.offset + sg->p.length; + + VTAILQ_INSERT_TAIL(&sc->segments, sg, list); + + /* Neuter the new segment in case there is an old one there */ + AN(sg->p.offset); + smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Set up our allocation points */ + sc->cur_seg = sg; + sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); + sc->next_top = smp_segend(sg); + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + IASSERTALIGN(sc, sc->next_bot); + IASSERTALIGN(sc, sc->next_top); + sg->objs = (void*)(sc->base + sc->next_top); +} + +/*-------------------------------------------------------------------- + * Close a segment + */ + +static void +smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) +{ + uint64_t left, dst, len; + void *dp; + + Lck_AssertHeld(&sc->mtx); + + assert(sg == sc->cur_seg); + AN(sg->p.offset); + sc->cur_seg = NULL; + + if (sg->nalloc == 0) { + /* XXX: if segment is empty, delete instead */ + VTAILQ_REMOVE(&sc->segments, sg, list); + free(sg); + return; + } + + /* + * If there is enough space left, that we can move the smp_objects + * down without overwriting the present copy, we will do so to + * compact the segment. + */ + left = smp_spaceleft(sc, sg); + len = sizeof(struct smp_object) * sg->p.lobjlist; + if (len < left) { + dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); + dp = sc->base + dst; + assert((uintptr_t)dp + len < (uintptr_t)sg->objs); + memcpy(dp, sg->objs, len); + sc->next_top = dst; + sg->objs = dp; + sg->p.length = (sc->next_top - sg->p.offset) + + len + IRNUP(sc, SMP_SIGN_SPACE); + (void)smp_spaceleft(sc, sg); /* for the asserts */ + + } + + /* Update the segment header */ + sg->p.objlist = sc->next_top; + + /* Write the (empty) OBJIDX signature */ + sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); + assert(sc->next_top >= sc->next_bot); + smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Write the (empty) SEGTAIL signature */ + smp_def_sign(sc, sg->ctx, + sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); + smp_reset_sign(sg->ctx); + smp_sync_sign(sg->ctx); + + /* Save segment list */ + smp_save_segs(sc); + sc->free_offset = smp_segend(sg); +} + +/*-------------------------------------------------------------------- + * Silo worker thread + */ + +static void * +smp_thread(struct sess *sp, void *priv) +{ + struct smp_sc *sc; + struct smp_seg *sg; + + (void)sp; + CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); + + /* First, load all the objects from all segments */ + VTAILQ_FOREACH(sg, &sc->segments, list) + if (sg->flags & SMP_SEG_MUSTLOAD) + smp_load_seg(sp, sc, sg); + + sc->flags |= SMP_SC_LOADED; + BAN_Deref(&sc->tailban); + sc->tailban = NULL; + printf("Silo completely loaded\n"); + while (1) + (void)sleep (1); + NEEDLESS_RETURN(NULL); +} + +/*-------------------------------------------------------------------- + * Open a silo in the worker process + */ + +static void +smp_open(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + + Lck_New(&sc->mtx, lck_smp); + Lck_Lock(&sc->mtx); + + sc->stevedore = st; + + /* We trust the parent to give us a valid silo, for good measure: */ + AZ(smp_valid_silo(sc)); + + AZ(mprotect(sc->base, 4096, PROT_READ)); + + sc->ident = SIGN_DATA(&sc->idn); + + /* We attempt ban1 first, and if that fails, try ban2 */ + if (smp_open_bans(sc, &sc->ban1)) + AZ(smp_open_bans(sc, &sc->ban2)); + + /* We attempt seg1 first, and if that fails, try seg2 */ + if (smp_open_segs(sc, &sc->seg1)) + AZ(smp_open_segs(sc, &sc->seg2)); + + sc->tailban = BAN_TailRef(); + AN(sc->tailban); + + /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ + + /* XXX: abandon early segments to make sure we have free space ? */ + + /* Open a new segment, so we are ready to write */ + smp_new_seg(sc); + + /* Start the worker silo worker thread, it will load the objects */ + WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); + + VTAILQ_INSERT_TAIL(&silos, sc, list); + Lck_Unlock(&sc->mtx); +} + +/*-------------------------------------------------------------------- + * Close a silo + */ + +static void +smp_close(const struct stevedore *st) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + Lck_Lock(&sc->mtx); + smp_close_seg(sc, sc->cur_seg); + Lck_Unlock(&sc->mtx); + + /* XXX: reap thread */ +} + +/*-------------------------------------------------------------------- + * Allocate a bite. + * + * Allocate [min_size...max_size] space from the bottom of the segment, + * as is convenient. + * + * If 'so' + 'idx' is given, also allocate a smp_object from the top + * of the segment. + * + * Return the segment in 'ssg' if given. + */ + +static struct storage * +smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, + struct smp_object **so, unsigned *idx, struct smp_seg **ssg) +{ + struct smp_sc *sc; + struct storage *ss; + struct smp_seg *sg; + unsigned tries; + uint64_t left, extra; + + CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); + assert(min_size <= max_size); + + max_size = IRNUP(sc, max_size); + min_size = IRNUP(sc, min_size); + + extra = IRNUP(sc, sizeof(*ss)); + if (so != NULL) { + extra += sizeof(**so); + AN(idx); + } + + Lck_Lock(&sc->mtx); + sg = NULL; + ss = NULL; + for (tries = 0; tries < 3; tries++) { + left = smp_spaceleft(sc, sc->cur_seg); + if (left >= extra + min_size) + break; + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } + if (left >= extra + min_size) { + if (left < extra + max_size) + max_size = IRNDN(sc, left - extra); + + sg = sc->cur_seg; + ss = (void*)(sc->base + sc->next_bot); + sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); + sg->nalloc++; + if (so != NULL) { + sc->next_top -= sizeof(**so); + *so = (void*)(sc->base + sc->next_top); + /* Render this smp_object mostly harmless */ + (*so)->ttl = 0.; + (*so)->ban = 0.; + (*so)->ptr = 0;; + sg->objs = *so; + *idx = ++sg->p.lobjlist; + } + (void)smp_spaceleft(sc, sg); /* for the assert */ + } + Lck_Unlock(&sc->mtx); + + if (ss == NULL) + return (ss); + AN(sg); + assert(max_size >= min_size); + + /* Fill the storage structure */ + memset(ss, 0, sizeof *ss); + ss->magic = STORAGE_MAGIC; + ss->ptr = PRNUP(sc, ss + 1); + ss->space = max_size; + ss->priv = sc; + ss->stevedore = st; + ss->fd = sc->fd; + if (ssg != NULL) + *ssg = sg; + return (ss); +} + +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_getlru(const struct object *o) +{ + struct smp_seg *sg; + + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); + return (sg->lru); +} + +/*-------------------------------------------------------------------- + * Allocate an object + */ + +static struct object * +smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, + const struct stv_objsecrets *soc) +{ + struct object *o; + struct storage *st; + struct smp_sc *sc; + struct smp_seg *sg; + struct smp_object *so; + struct objcore *oc; + unsigned objidx; + + CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); + AN(sp->objcore); + AN(sp->wrk->ttl >= 0); + + ltot = IRNUP(sc, ltot); + + st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); + if (st == NULL) + return (NULL); + + assert(st->space >= ltot); + ltot = st->len = st->space; + + o = STV_MkObject(sp, st->ptr, ltot, soc); + CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); + o->objstore = st; + + oc = o->objcore; + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + oc->flags |= OC_F_LRUDONTMOVE; + + Lck_Lock(&sc->mtx); + sg->nfixed++; + sg->nobj++; + + /* We have to do this somewhere, might as well be here... */ + assert(sizeof so->hash == DIGEST_LEN); + memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); + so->ttl = o->ttl; /* XXX: grace? */ + so->ptr = (uint8_t*)o - sc->base; + so->ban = o->ban_t; + + oc->priv = sg; + oc->priv2 = objidx; + oc->methods = &smp_oc_methods; + + Lck_Unlock(&sc->mtx); + return (o); +} + +/*-------------------------------------------------------------------- + * Allocate a bite + */ + +static struct storage * +smp_alloc(struct stevedore *st, size_t size) +{ + + return (smp_allocx(st, + size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); +} + +/*-------------------------------------------------------------------- + * Trim a bite + * XXX: We could trim the last allocation. + */ + +static void +smp_trim(struct storage *ss, size_t size) +{ + + (void)ss; + (void)size; +} + +/*-------------------------------------------------------------------- + * We don't track frees of storage, we track the objects which own the + * storage and when there are no more objects in in the first segment, + * it can be reclaimed. + * XXX: We could free the last allocation, but does that happen ? + */ + +static void __match_proto__(storage_free_f) +smp_free(struct storage *st) +{ + + /* XXX */ + (void)st; +} + +/*-------------------------------------------------------------------- + * Pause until all silos have loaded. + */ + +void +SMP_Ready(void) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_SC_LOADED)) + break; + if (sc != NULL) + (void)sleep(1); + } while (sc != NULL); +} + +/*--------------------------------------------------------------------*/ + +const struct stevedore smp_stevedore = { + .magic = STEVEDORE_MAGIC, + .name = "persistent", + .init = smp_mgt_init, + .open = smp_open, + .close = smp_close, + .alloc = smp_alloc, + .allocobj = smp_allocobj, + .getlru = smp_getlru, + .free = smp_free, + .trim = smp_trim, +}; + +/*-------------------------------------------------------------------- + * Persistence is a bear to test unadultered, so we cheat by adding + * a cli command we can use to make it do tricks for us. + */ + +static void +debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) +{ + struct smp_seg *sg; + struct objcore *oc; + + cli_out(cli, "Silo: %s (%s)\n", + sc->stevedore->ident, sc->filename); + VTAILQ_FOREACH(sg, &sc->segments, list) { + cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", + (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); + if (sg == sc->cur_seg) + cli_out(cli, + " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", + (uintmax_t)(sc->next_bot), + (uintmax_t)(sc->next_top), + (uintmax_t)(sc->next_top - sc->next_bot)); + cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", + sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); + if (objs) { + VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " %s %p\n", + oc == &sg->lru->senteniel ? + "senteniel" : "OC: ", oc); + } + } +} + +static void +debug_persistent(struct cli *cli, const char * const * av, void *priv) +{ + struct smp_sc *sc; + + (void)priv; + + if (av[2] == NULL) { + VTAILQ_FOREACH(sc, &silos, list) + debug_report_silo(cli, sc, 0); + return; + } + VTAILQ_FOREACH(sc, &silos, list) + if (!strcmp(av[2], sc->stevedore->ident)) + break; + if (sc == NULL) { + cli_out(cli, "Silo <%s> not found\n", av[2]); + cli_result(cli, CLIS_PARAM); + return; + } + if (av[3] == NULL) { + debug_report_silo(cli, sc, 0); + return; + } + Lck_Lock(&sc->mtx); + if (!strcmp(av[3], "sync")) { + smp_close_seg(sc, sc->cur_seg); + smp_new_seg(sc); + } else if (!strcmp(av[3], "dump")) { + debug_report_silo(cli, sc, 1); + } else { + cli_out(cli, "Unknown operation\n"); + cli_result(cli, CLIS_PARAM); + } + Lck_Unlock(&sc->mtx); +} + +static struct cli_proto debug_cmds[] = { + { "debug.persistent", "debug.persistent", + "Persistent debugging magic:\n" + "\tdebug.persistent [stevedore [cmd]]\n" + "With no cmd arg, a summary of the silo is returned.\n" + "Possible commands:\n" + "\tsync\tClose current segment, open a new one\n" + "\tdump\tinclude objcores in silo summary\n" + "", + 0, 2, "d", debug_persistent }, + { NULL } +}; + +void +SMP_Init(void) +{ + CLI_AddFuncs(debug_cmds); +} From phk at varnish-cache.org Tue Feb 8 10:50:41 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 08 Feb 2011 11:50:41 +0100 Subject: [master] 052473d Split the silo-centric from the policy parts of persistent storage code. Message-ID: commit 052473ddffd2ef94345e06abe3d864bd66ed479e Author: Poul-Henning Kamp Date: Tue Feb 8 10:49:59 2011 +0000 Split the silo-centric from the policy parts of persistent storage code. diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index 208be08..b8db6e0 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -65,6 +65,7 @@ varnishd_SOURCES = \ storage_malloc.c \ storage_persistent.c \ storage_persistent_mgt.c \ + storage_persistent_silo.c \ storage_persistent_subr.c \ storage_synth.c \ storage_umem.c \ diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 4a971cb..1ae9e80 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -66,262 +66,6 @@ SVNID("$Id$") static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); /*-------------------------------------------------------------------- - * Write the segmentlist back to the silo. - * - * We write the first copy, sync it synchronously, then write the - * second copy and sync it synchronously. - * - * Provided the kernel doesn't lie, that means we will always have - * at least one valid copy on in the silo. - */ - -static void -smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) -{ - struct smp_segptr *ss; - struct smp_seg *sg; - uint64_t length; - - Lck_AssertHeld(&sc->mtx); - smp_reset_sign(ctx); - ss = SIGN_DATA(ctx); - length = 0; - VTAILQ_FOREACH(sg, &sc->segments, list) { - assert(sg->p.offset < sc->mediasize); - assert(sg->p.offset + sg->p.length <= sc->mediasize); - *ss = sg->p; - ss++; - length += sizeof *ss; - } - smp_append_sign(ctx, SIGN_DATA(ctx), length); - smp_sync_sign(ctx); -} - -static void -smp_save_segs(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - - /* - * Remove empty segments from the front of the list - * before we write the segments to disk. - */ - VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) { - if (sg->nobj > 0) - break; - if (sg == sc->cur_seg) - continue; - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - } - smp_save_seg(sc, &sc->seg1); - smp_save_seg(sc, &sc->seg2); -} - - -/*--------------------------------------------------------------------- - */ - -static struct smp_object * -smp_find_so(const struct smp_seg *sg, const struct objcore *oc) -{ - struct smp_object *so; - unsigned smp_idx; - - smp_idx = oc->priv2; - assert(smp_idx > 0); - assert(smp_idx <= sg->p.lobjlist); - so = &sg->objs[sg->p.lobjlist - smp_idx]; - return (so); -} - -/*--------------------------------------------------------------------- - * Check if a given storage structure is valid to use - */ - -static int -smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, - const struct storage *st) -{ - struct smp_seg *sg2; - const uint8_t *pst; - uint64_t o; - - (void)sg; /* XXX: faster: Start search from here */ - pst = (const void *)st; - - if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) - return (0x01); /* Before silo payload start */ - if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) - return (0x02); /* After silo end */ - - o = pst - sc->base; - - /* Find which segment contains the storage structure */ - VTAILQ_FOREACH(sg2, &sc->segments, list) - if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) - break; - if (sg2 == NULL) - return (0x04); /* No claiming segment */ - if (!(sg2->flags & SMP_SEG_LOADED)) - return (0x08); /* Claiming segment not loaded */ - - /* It is now safe to access the storage structure */ - if (st->magic != STORAGE_MAGIC) - return (0x10); /* Not enough magic */ - - if (o + st->space >= sg2->p.objlist) - return (0x20); /* Allocation not inside segment */ - - if (st->len > st->space) - return (0x40); /* Plain bad... */ - - /* - * XXX: We could patch up st->stevedore and st->priv here - * XXX: but if things go right, we will never need them. - */ - return (0); -} - -/*--------------------------------------------------------------------- - * objcore methods for persistent objects - */ - -static struct object * -smp_oc_getobj(struct worker *wrk, struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - struct storage *st; - uint64_t l; - int bad; - - /* Some calls are direct, but they should match anyway */ - assert(oc->methods->getobj == smp_oc_getobj); - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (wrk == NULL) - AZ(oc->flags & OC_F_NEEDFIXUP); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - o = (void*)(sg->sc->base + so->ptr); - /* - * The object may not be in this segment since we allocate it - * In a separate operation than the smp_object. We could check - * that it is in a later segment, but that would be complicated. - * XXX: For now, be happy if it is inside th silo - */ - ASSERT_PTR_IN_SILO(sg->sc, o); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - - /* - * If this flag is not set, it will not be, and the lock is not - * needed to test it. - */ - if (!(oc->flags & OC_F_NEEDFIXUP)) - return (o); - - AN(wrk); - Lck_Lock(&sg->sc->mtx); - /* Check again, we might have raced. */ - if (oc->flags & OC_F_NEEDFIXUP) { - /* We trust caller to have a refcnt for us */ - o->objcore = oc; - - bad = 0; - l = 0; - VTAILQ_FOREACH(st, &o->store, list) { - bad |= smp_loaded_st(sg->sc, sg, st); - if (bad) - break; - l += st->len; - } - if (l != o->len) - bad |= 0x100; - - if(bad) { - o->ttl = 0; - o->grace = 0; - so->ttl = 0; - } - - sg->nfixed++; - wrk->stats.n_object++; - wrk->stats.n_vampireobject--; - oc->flags &= ~OC_F_NEEDFIXUP; - } - Lck_Unlock(&sg->sc->mtx); - return (o); -} - -static void -smp_oc_updatemeta(struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - double mttl; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - so = smp_find_so(sg, oc); - - if (isnan(o->grace)) - mttl = o->ttl; - else - mttl = - (o->ttl + o->grace); - - if (sg == sg->sc->cur_seg) { - /* Lock necessary, we might race close_seg */ - Lck_Lock(&sg->sc->mtx); - so->ban = o->ban_t; - so->ttl = mttl; - Lck_Unlock(&sg->sc->mtx); - } else { - so->ban = o->ban_t; - so->ttl = mttl; - } -} - -static void __match_proto__() -smp_oc_freeobj(struct objcore *oc) -{ - struct smp_seg *sg; - struct smp_object *so; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - Lck_Lock(&sg->sc->mtx); - so->ttl = 0; - so->ptr = 0; - - assert(sg->nobj > 0); - assert(sg->nfixed > 0); - sg->nobj--; - sg->nfixed--; - - Lck_Unlock(&sg->sc->mtx); -} - -static struct objcore_methods smp_oc_methods = { - .getobj = smp_oc_getobj, - .updatemeta = smp_oc_updatemeta, - .freeobj = smp_oc_freeobj, -}; - -/*-------------------------------------------------------------------- * Add a new ban to all silos */ @@ -422,91 +166,6 @@ smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) } -/*--------------------------------------------------------------------*/ - -static uint64_t -smp_segend(const struct smp_seg *sg) -{ - - return (sg->p.offset + sg->p.length); -} - -static uint64_t -smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) -{ - - IASSERTALIGN(sc, sc->next_bot); - assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - assert(sc->next_bot >= sg->p.offset); - assert(sc->next_top < sg->p.offset + sg->p.length); - return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); -} - -/*-------------------------------------------------------------------- - * Load segments - * - * The overall objective is to register the existence of an object, based - * only on the minimally sized struct smp_object, without causing the - * main object to be faulted in. - * - * XXX: We can test this by mprotecting the main body of the segment - * XXX: until the first fixup happens, or even just over this loop, - * XXX: However: the requires that the smp_objects starter further - * XXX: into the segment than a page so that they do not get hit - * XXX: by the protection. - */ - -static void -smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) -{ - struct smp_object *so; - struct objcore *oc; - uint32_t no; - double t_now = TIM_real(); - struct smp_signctx ctx[1]; - - ASSERT_SILO_THREAD(sc); - CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - assert(sg->flags & SMP_SEG_MUSTLOAD); - sg->flags &= ~SMP_SEG_MUSTLOAD; - AN(sg->p.offset); - if (sg->p.objlist == 0) - return; - smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD"); - if (smp_chk_sign(ctx)) - return; - - /* test SEGTAIL */ - /* test OBJIDX */ - so = (void*)(sc->base + sg->p.objlist); - sg->objs = so; - no = sg->p.lobjlist; - /* Clear the bogus "hold" count */ - sg->nobj = 0; - for (;no > 0; so++,no--) { - if (so->ttl > 0 && so->ttl < t_now) - continue; - if (so->ttl < 0 && -so->ttl < t_now) - continue; - HSH_Prealloc(sp); - oc = sp->wrk->nobjcore; - oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; - oc->flags &= ~OC_F_BUSY; - oc->priv = sg; - oc->priv2 = no; - oc->methods = &smp_oc_methods; - oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); - memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); - (void)HSH_Insert(sp); - AZ(sp->wrk->nobjcore); - EXP_Inject(oc, sg->lru, fabs(so->ttl)); - sg->nobj++; - } - WRK_SumStat(sp->wrk); - sg->flags |= SMP_SEG_LOADED; -} /*-------------------------------------------------------------------- * Attempt to open and read in a segment list @@ -629,142 +288,6 @@ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) } /*-------------------------------------------------------------------- - * Create a new segment - */ - -static void -smp_new_seg(struct smp_sc *sc) -{ - struct smp_seg *sg, *sg2; - - Lck_AssertHeld(&sc->mtx); - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->sc = sc; - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - - /* XXX: find where it goes in silo */ - - sg->p.offset = sc->free_offset; - // XXX: align */ - assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]); - assert(sg->p.offset < sc->mediasize); - - sg->p.length = sc->aim_segl; - sg->p.length &= ~7; - - if (smp_segend(sg) > sc->mediasize) { - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - sg->p.offset = sc->free_offset; - sg2 = VTAILQ_FIRST(&sc->segments); - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - } - - - assert(smp_segend(sg) <= sc->mediasize); - - sg2 = VTAILQ_FIRST(&sc->segments); - if (sg2 != NULL && sg2->p.offset > sc->free_offset) { - if (smp_segend(sg) > sg2->p.offset) { - printf("Out of space in persistent silo\n"); - printf("Committing suicide, restart will make space\n"); - exit (0); - } - assert(smp_segend(sg) <= sg2->p.offset); - } - - sg->p.offset = IRNUP(sc, sg->p.offset); - sg->p.length = IRNDN(sc, sg->p.length); - sc->free_offset = sg->p.offset + sg->p.length; - - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - - /* Neuter the new segment in case there is an old one there */ - AN(sg->p.offset); - smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Set up our allocation points */ - sc->cur_seg = sg; - sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE); - sc->next_top = smp_segend(sg); - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - IASSERTALIGN(sc, sc->next_bot); - IASSERTALIGN(sc, sc->next_top); - sg->objs = (void*)(sc->base + sc->next_top); -} - -/*-------------------------------------------------------------------- - * Close a segment - */ - -static void -smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) -{ - uint64_t left, dst, len; - void *dp; - - Lck_AssertHeld(&sc->mtx); - - assert(sg == sc->cur_seg); - AN(sg->p.offset); - sc->cur_seg = NULL; - - if (sg->nalloc == 0) { - /* XXX: if segment is empty, delete instead */ - VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); - return; - } - - /* - * If there is enough space left, that we can move the smp_objects - * down without overwriting the present copy, we will do so to - * compact the segment. - */ - left = smp_spaceleft(sc, sg); - len = sizeof(struct smp_object) * sg->p.lobjlist; - if (len < left) { - dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE); - dp = sc->base + dst; - assert((uintptr_t)dp + len < (uintptr_t)sg->objs); - memcpy(dp, sg->objs, len); - sc->next_top = dst; - sg->objs = dp; - sg->p.length = (sc->next_top - sg->p.offset) - + len + IRNUP(sc, SMP_SIGN_SPACE); - (void)smp_spaceleft(sc, sg); /* for the asserts */ - - } - - /* Update the segment header */ - sg->p.objlist = sc->next_top; - - /* Write the (empty) OBJIDX signature */ - sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE); - assert(sc->next_top >= sc->next_bot); - smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Write the (empty) SEGTAIL signature */ - smp_def_sign(sc, sg->ctx, - sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL"); - smp_reset_sign(sg->ctx); - smp_sync_sign(sg->ctx); - - /* Save segment list */ - smp_save_segs(sc); - sc->free_offset = smp_segend(sg); -} - -/*-------------------------------------------------------------------- * Silo worker thread */ @@ -1006,9 +529,7 @@ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, so->ptr = (uint8_t*)o - sc->base; so->ban = o->ban_t; - oc->priv = sg; - oc->priv2 = objidx; - oc->methods = &smp_oc_methods; + smp_init_oc(oc, sg, objidx); Lck_Unlock(&sc->mtx); return (o); diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index 8184588..35affb6 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -174,20 +174,26 @@ struct smp_sc { #define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) #define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) -/* storage_persistent.c */ - /* storage_persistent_mgt.c */ + void smp_mgt_init(struct stevedore *parent, int ac, char * const *av); +/* storage_persistent_silo.c */ + +void smp_load_seg(const struct sess *sp, const struct smp_sc *sc, + struct smp_seg *sg); +void smp_new_seg(struct smp_sc *sc); +void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg); +void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx); + /* storage_persistent_subr.c */ + void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, uint64_t off, const char *id); int smp_chk_sign(struct smp_signctx *ctx); void smp_append_sign(struct smp_signctx *ctx, const void *ptr, uint32_t len); void smp_reset_sign(struct smp_signctx *ctx); void smp_sync_sign(const struct smp_signctx *ctx); -void smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, - uint64_t off, const char *id); void smp_newsilo(struct smp_sc *sc); int smp_valid_silo(struct smp_sc *sc); @@ -206,6 +212,20 @@ smp_stuff_len(const struct smp_sc *sc, unsigned stuff) return (l); } +static inline uint64_t +smp_segend(const struct smp_seg *sg) +{ + return (sg->p.offset + sg->p.length); +} +static inline uint64_t +smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) +{ + IASSERTALIGN(sc, sc->next_bot); + assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); + assert(sc->next_bot >= sg->p.offset); + assert(sc->next_top < sg->p.offset + sg->p.length); + return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); +} diff --git a/bin/varnishd/storage_persistent_silo.c b/bin/varnishd/storage_persistent_silo.c index 4a971cb..480474e 100644 --- a/bin/varnishd/storage_persistent_silo.c +++ b/bin/varnishd/storage_persistent_silo.c @@ -38,33 +38,20 @@ #include "svnid.h" SVNID("$Id$") -#include #include #include #include #include -#include -#include #include #include "cache.h" #include "stevedore.h" #include "hash_slinger.h" #include "vsha256.h" -#include "cli.h" -#include "cli_priv.h" #include "persistent.h" #include "storage_persistent.h" -/*--------------------------------------------------------------------*/ - -/* - * silos is unlocked, it only changes during startup when we are - * single-threaded - */ -static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); - /*-------------------------------------------------------------------- * Write the segmentlist back to the silo. * @@ -120,328 +107,6 @@ smp_save_segs(struct smp_sc *sc) smp_save_seg(sc, &sc->seg2); } - -/*--------------------------------------------------------------------- - */ - -static struct smp_object * -smp_find_so(const struct smp_seg *sg, const struct objcore *oc) -{ - struct smp_object *so; - unsigned smp_idx; - - smp_idx = oc->priv2; - assert(smp_idx > 0); - assert(smp_idx <= sg->p.lobjlist); - so = &sg->objs[sg->p.lobjlist - smp_idx]; - return (so); -} - -/*--------------------------------------------------------------------- - * Check if a given storage structure is valid to use - */ - -static int -smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, - const struct storage *st) -{ - struct smp_seg *sg2; - const uint8_t *pst; - uint64_t o; - - (void)sg; /* XXX: faster: Start search from here */ - pst = (const void *)st; - - if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) - return (0x01); /* Before silo payload start */ - if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) - return (0x02); /* After silo end */ - - o = pst - sc->base; - - /* Find which segment contains the storage structure */ - VTAILQ_FOREACH(sg2, &sc->segments, list) - if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) - break; - if (sg2 == NULL) - return (0x04); /* No claiming segment */ - if (!(sg2->flags & SMP_SEG_LOADED)) - return (0x08); /* Claiming segment not loaded */ - - /* It is now safe to access the storage structure */ - if (st->magic != STORAGE_MAGIC) - return (0x10); /* Not enough magic */ - - if (o + st->space >= sg2->p.objlist) - return (0x20); /* Allocation not inside segment */ - - if (st->len > st->space) - return (0x40); /* Plain bad... */ - - /* - * XXX: We could patch up st->stevedore and st->priv here - * XXX: but if things go right, we will never need them. - */ - return (0); -} - -/*--------------------------------------------------------------------- - * objcore methods for persistent objects - */ - -static struct object * -smp_oc_getobj(struct worker *wrk, struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - struct storage *st; - uint64_t l; - int bad; - - /* Some calls are direct, but they should match anyway */ - assert(oc->methods->getobj == smp_oc_getobj); - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (wrk == NULL) - AZ(oc->flags & OC_F_NEEDFIXUP); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - o = (void*)(sg->sc->base + so->ptr); - /* - * The object may not be in this segment since we allocate it - * In a separate operation than the smp_object. We could check - * that it is in a later segment, but that would be complicated. - * XXX: For now, be happy if it is inside th silo - */ - ASSERT_PTR_IN_SILO(sg->sc, o); - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - - /* - * If this flag is not set, it will not be, and the lock is not - * needed to test it. - */ - if (!(oc->flags & OC_F_NEEDFIXUP)) - return (o); - - AN(wrk); - Lck_Lock(&sg->sc->mtx); - /* Check again, we might have raced. */ - if (oc->flags & OC_F_NEEDFIXUP) { - /* We trust caller to have a refcnt for us */ - o->objcore = oc; - - bad = 0; - l = 0; - VTAILQ_FOREACH(st, &o->store, list) { - bad |= smp_loaded_st(sg->sc, sg, st); - if (bad) - break; - l += st->len; - } - if (l != o->len) - bad |= 0x100; - - if(bad) { - o->ttl = 0; - o->grace = 0; - so->ttl = 0; - } - - sg->nfixed++; - wrk->stats.n_object++; - wrk->stats.n_vampireobject--; - oc->flags &= ~OC_F_NEEDFIXUP; - } - Lck_Unlock(&sg->sc->mtx); - return (o); -} - -static void -smp_oc_updatemeta(struct objcore *oc) -{ - struct object *o; - struct smp_seg *sg; - struct smp_object *so; - double mttl; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - o = smp_oc_getobj(NULL, oc); - AN(o); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); - so = smp_find_so(sg, oc); - - if (isnan(o->grace)) - mttl = o->ttl; - else - mttl = - (o->ttl + o->grace); - - if (sg == sg->sc->cur_seg) { - /* Lock necessary, we might race close_seg */ - Lck_Lock(&sg->sc->mtx); - so->ban = o->ban_t; - so->ttl = mttl; - Lck_Unlock(&sg->sc->mtx); - } else { - so->ban = o->ban_t; - so->ttl = mttl; - } -} - -static void __match_proto__() -smp_oc_freeobj(struct objcore *oc) -{ - struct smp_seg *sg; - struct smp_object *so; - - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - - CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); - so = smp_find_so(sg, oc); - - Lck_Lock(&sg->sc->mtx); - so->ttl = 0; - so->ptr = 0; - - assert(sg->nobj > 0); - assert(sg->nfixed > 0); - sg->nobj--; - sg->nfixed--; - - Lck_Unlock(&sg->sc->mtx); -} - -static struct objcore_methods smp_oc_methods = { - .getobj = smp_oc_getobj, - .updatemeta = smp_oc_updatemeta, - .freeobj = smp_oc_freeobj, -}; - -/*-------------------------------------------------------------------- - * Add a new ban to all silos - */ - -static void -smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, - uint32_t flags, uint32_t len, const char *ban) -{ - uint8_t *ptr, *ptr2; - - (void)sc; - ptr = ptr2 = SIGN_END(ctx); - - memcpy(ptr, "BAN", 4); - ptr += 4; - - memcpy(ptr, &t0, sizeof t0); - ptr += sizeof t0; - - memcpy(ptr, &flags, sizeof flags); - ptr += sizeof flags; - - memcpy(ptr, &len, sizeof len); - ptr += sizeof len; - - memcpy(ptr, ban, len); - ptr += len; - - smp_append_sign(ctx, ptr2, ptr - ptr2); -} - -void -SMP_NewBan(double t0, const char *ban) -{ - struct smp_sc *sc; - uint32_t l = strlen(ban) + 1; - - VTAILQ_FOREACH(sc, &silos, list) { - smp_appendban(sc, &sc->ban1, t0, 0, l, ban); - smp_appendban(sc, &sc->ban2, t0, 0, l, ban); - } -} - -/*-------------------------------------------------------------------- - * Attempt to open and read in a ban list - */ - -static int -smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint8_t *ptr, *pe; - double t0; - uint32_t flags, length; - int i, retval = 0; - - ASSERT_CLI(); - (void)sc; - i = smp_chk_sign(ctx); - if (i) - return (i); - ptr = SIGN_DATA(ctx); - pe = ptr + ctx->ss->length; - - while (ptr < pe) { - if (memcmp(ptr, "BAN", 4)) { - retval = 1001; - break; - } - ptr += 4; - - memcpy(&t0, ptr, sizeof t0); - ptr += sizeof t0; - - memcpy(&flags, ptr, sizeof flags); - ptr += sizeof flags; - if (flags != 0) { - retval = 1002; - break; - } - - memcpy(&length, ptr, sizeof length); - ptr += sizeof length; - if (ptr + length > pe) { - retval = 1003; - break; - } - - if (ptr[length - 1] != '\0') { - retval = 1004; - break; - } - - BAN_Reload(t0, flags, (const char *)ptr); - - ptr += length; - } - assert(ptr <= pe); - return (retval); -} - - -/*--------------------------------------------------------------------*/ - -static uint64_t -smp_segend(const struct smp_seg *sg) -{ - - return (sg->p.offset + sg->p.length); -} - -static uint64_t -smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) -{ - - IASSERTALIGN(sc, sc->next_bot); - assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); - assert(sc->next_bot >= sg->p.offset); - assert(sc->next_top < sg->p.offset + sg->p.length); - return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); -} - /*-------------------------------------------------------------------- * Load segments * @@ -456,8 +121,9 @@ smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) * XXX: by the protection. */ -static void -smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) +void +smp_load_seg(const struct sess *sp, const struct smp_sc *sc, + struct smp_seg *sg) { struct smp_object *so; struct objcore *oc; @@ -494,9 +160,7 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) oc = sp->wrk->nobjcore; oc->flags |= OC_F_NEEDFIXUP | OC_F_LRUDONTMOVE; oc->flags &= ~OC_F_BUSY; - oc->priv = sg; - oc->priv2 = no; - oc->methods = &smp_oc_methods; + smp_init_oc(oc, sg, no); oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); memcpy(sp->wrk->nobjhead->digest, so->hash, SHA256_LEN); (void)HSH_Insert(sp); @@ -509,130 +173,10 @@ smp_load_seg(const struct sess *sp, const struct smp_sc *sc, struct smp_seg *sg) } /*-------------------------------------------------------------------- - * Attempt to open and read in a segment list - */ - -static int -smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx) -{ - uint64_t length, l; - struct smp_segptr *ss, *se; - struct smp_seg *sg, *sg1, *sg2; - int i, n = 0; - - ASSERT_CLI(); - i = smp_chk_sign(ctx); - if (i) - return (i); - - ss = SIGN_DATA(ctx); - length = ctx->ss->length; - - if (length == 0) { - /* No segments */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - return (0); - } - se = ss + length / sizeof *ss; - se--; - assert(ss <= se); - - /* - * Locate the free reserve, there are only two basic cases, - * but once we start dropping segments, things gets more complicated. - */ - - sc->free_offset = se->offset + se->length; - l = sc->mediasize - sc->free_offset; - if (se->offset > ss->offset && l >= sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * Plenty of space at tail, do nothing. - */ - } else if (ss->offset > se->offset) { - /* - * [zzzz____xxxxyyyy_] - * (make) space between ends - * We might nuke the entire tail end without getting - * enough space, in which case we fall through to the - * last check. - */ - while (ss < se && ss->offset > se->offset) { - l = ss->offset - (se->offset + se->length); - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - if (l < sc->free_reserve) { - /* - * [__xxxxyyyyzzzz___] - * (make) space at front - */ - sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF]; - while (ss < se) { - l = ss->offset - sc->free_offset; - if (l > sc->free_reserve) - break; - ss++; - n++; - } - } - - assert (l >= sc->free_reserve); - - - sg1 = NULL; - sg2 = NULL; - for(; ss <= se; ss++) { - ALLOC_OBJ(sg, SMP_SEG_MAGIC); - AN(sg); - sg->lru = LRU_Alloc(); - CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC); - sg->p = *ss; - - sg->flags |= SMP_SEG_MUSTLOAD; - - /* - * HACK: prevent save_segs from nuking segment until we have - * HACK: loaded it. - */ - sg->nobj = 1; - if (sg1 != NULL) { - assert(sg1->p.offset != sg->p.offset); - if (sg1->p.offset < sg->p.offset) - assert(smp_segend(sg1) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg1->p.offset); - } - if (sg2 != NULL) { - assert(sg2->p.offset != sg->p.offset); - if (sg2->p.offset < sg->p.offset) - assert(smp_segend(sg2) <= sg->p.offset); - else - assert(smp_segend(sg) <= sg2->p.offset); - } - - /* XXX: check that they are inside silo */ - /* XXX: check that they don't overlap */ - /* XXX: check that they are serial */ - sg->sc = sc; - VTAILQ_INSERT_TAIL(&sc->segments, sg, list); - sg2 = sg; - if (sg1 == NULL) - sg1 = sg; - } - printf("Dropped %d segments to make free_reserve\n", n); - return (0); -} - -/*-------------------------------------------------------------------- * Create a new segment */ -static void +void smp_new_seg(struct smp_sc *sc) { struct smp_seg *sg, *sg2; @@ -704,7 +248,7 @@ smp_new_seg(struct smp_sc *sc) * Close a segment */ -static void +void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) { uint64_t left, dst, len; @@ -764,415 +308,214 @@ smp_close_seg(struct smp_sc *sc, struct smp_seg *sg) sc->free_offset = smp_segend(sg); } -/*-------------------------------------------------------------------- - * Silo worker thread - */ - -static void * -smp_thread(struct sess *sp, void *priv) -{ - struct smp_sc *sc; - struct smp_seg *sg; - - (void)sp; - CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC); - - /* First, load all the objects from all segments */ - VTAILQ_FOREACH(sg, &sc->segments, list) - if (sg->flags & SMP_SEG_MUSTLOAD) - smp_load_seg(sp, sc, sg); - - sc->flags |= SMP_SC_LOADED; - BAN_Deref(&sc->tailban); - sc->tailban = NULL; - printf("Silo completely loaded\n"); - while (1) - (void)sleep (1); - NEEDLESS_RETURN(NULL); -} - -/*-------------------------------------------------------------------- - * Open a silo in the worker process - */ - -static void -smp_open(const struct stevedore *st) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - - Lck_New(&sc->mtx, lck_smp); - Lck_Lock(&sc->mtx); - - sc->stevedore = st; - - /* We trust the parent to give us a valid silo, for good measure: */ - AZ(smp_valid_silo(sc)); - - AZ(mprotect(sc->base, 4096, PROT_READ)); - - sc->ident = SIGN_DATA(&sc->idn); - - /* We attempt ban1 first, and if that fails, try ban2 */ - if (smp_open_bans(sc, &sc->ban1)) - AZ(smp_open_bans(sc, &sc->ban2)); - - /* We attempt seg1 first, and if that fails, try seg2 */ - if (smp_open_segs(sc, &sc->seg1)) - AZ(smp_open_segs(sc, &sc->seg2)); - - sc->tailban = BAN_TailRef(); - AN(sc->tailban); - - /* XXX: save segments to ensure consistency between seg1 & seg2 ? */ - - /* XXX: abandon early segments to make sure we have free space ? */ - - /* Open a new segment, so we are ready to write */ - smp_new_seg(sc); - - /* Start the worker silo worker thread, it will load the objects */ - WRK_BgThread(&sc->thread, "persistence", smp_thread, sc); - - VTAILQ_INSERT_TAIL(&silos, sc, list); - Lck_Unlock(&sc->mtx); -} - -/*-------------------------------------------------------------------- - * Close a silo +/*--------------------------------------------------------------------- */ -static void -smp_close(const struct stevedore *st) +static struct smp_object * +smp_find_so(const struct smp_seg *sg, const struct objcore *oc) { - struct smp_sc *sc; - - ASSERT_CLI(); - - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - Lck_Lock(&sc->mtx); - smp_close_seg(sc, sc->cur_seg); - Lck_Unlock(&sc->mtx); + struct smp_object *so; + unsigned smp_idx; - /* XXX: reap thread */ + smp_idx = oc->priv2; + assert(smp_idx > 0); + assert(smp_idx <= sg->p.lobjlist); + so = &sg->objs[sg->p.lobjlist - smp_idx]; + return (so); } -/*-------------------------------------------------------------------- - * Allocate a bite. - * - * Allocate [min_size...max_size] space from the bottom of the segment, - * as is convenient. - * - * If 'so' + 'idx' is given, also allocate a smp_object from the top - * of the segment. - * - * Return the segment in 'ssg' if given. +/*--------------------------------------------------------------------- + * Check if a given storage structure is valid to use */ -static struct storage * -smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, - struct smp_object **so, unsigned *idx, struct smp_seg **ssg) +static int +smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg, + const struct storage *st) { - struct smp_sc *sc; - struct storage *ss; - struct smp_seg *sg; - unsigned tries; - uint64_t left, extra; + struct smp_seg *sg2; + const uint8_t *pst; + uint64_t o; - CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC); - assert(min_size <= max_size); + (void)sg; /* XXX: faster: Start search from here */ + pst = (const void *)st; - max_size = IRNUP(sc, max_size); - min_size = IRNUP(sc, min_size); + if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF])) + return (0x01); /* Before silo payload start */ + if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF])) + return (0x02); /* After silo end */ - extra = IRNUP(sc, sizeof(*ss)); - if (so != NULL) { - extra += sizeof(**so); - AN(idx); - } + o = pst - sc->base; - Lck_Lock(&sc->mtx); - sg = NULL; - ss = NULL; - for (tries = 0; tries < 3; tries++) { - left = smp_spaceleft(sc, sc->cur_seg); - if (left >= extra + min_size) + /* Find which segment contains the storage structure */ + VTAILQ_FOREACH(sg2, &sc->segments, list) + if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist) break; - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } - if (left >= extra + min_size) { - if (left < extra + max_size) - max_size = IRNDN(sc, left - extra); - - sg = sc->cur_seg; - ss = (void*)(sc->base + sc->next_bot); - sc->next_bot += max_size + IRNUP(sc, sizeof(*ss)); - sg->nalloc++; - if (so != NULL) { - sc->next_top -= sizeof(**so); - *so = (void*)(sc->base + sc->next_top); - /* Render this smp_object mostly harmless */ - (*so)->ttl = 0.; - (*so)->ban = 0.; - (*so)->ptr = 0;; - sg->objs = *so; - *idx = ++sg->p.lobjlist; - } - (void)smp_spaceleft(sc, sg); /* for the assert */ - } - Lck_Unlock(&sc->mtx); + if (sg2 == NULL) + return (0x04); /* No claiming segment */ + if (!(sg2->flags & SMP_SEG_LOADED)) + return (0x08); /* Claiming segment not loaded */ - if (ss == NULL) - return (ss); - AN(sg); - assert(max_size >= min_size); - - /* Fill the storage structure */ - memset(ss, 0, sizeof *ss); - ss->magic = STORAGE_MAGIC; - ss->ptr = PRNUP(sc, ss + 1); - ss->space = max_size; - ss->priv = sc; - ss->stevedore = st; - ss->fd = sc->fd; - if (ssg != NULL) - *ssg = sg; - return (ss); -} + /* It is now safe to access the storage structure */ + if (st->magic != STORAGE_MAGIC) + return (0x10); /* Not enough magic */ -/*-------------------------------------------------------------------- - * Find the per-segment lru list for this object - */ + if (o + st->space >= sg2->p.objlist) + return (0x20); /* Allocation not inside segment */ -static struct lru * -smp_getlru(const struct object *o) -{ - struct smp_seg *sg; + if (st->len > st->space) + return (0x40); /* Plain bad... */ - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); - return (sg->lru); + /* + * XXX: We could patch up st->stevedore and st->priv here + * XXX: but if things go right, we will never need them. + */ + return (0); } -/*-------------------------------------------------------------------- - * Allocate an object +/*--------------------------------------------------------------------- + * objcore methods for persistent objects */ static struct object * -smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot, - const struct stv_objsecrets *soc) +smp_oc_getobj(struct worker *wrk, struct objcore *oc) { struct object *o; - struct storage *st; - struct smp_sc *sc; struct smp_seg *sg; struct smp_object *so; - struct objcore *oc; - unsigned objidx; - - CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC); - AN(sp->objcore); - AN(sp->wrk->ttl >= 0); + struct storage *st; + uint64_t l; + int bad; - ltot = IRNUP(sc, ltot); + /* Some calls are direct, but they should match anyway */ + assert(oc->methods->getobj == smp_oc_getobj); - st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg); - if (st == NULL) - return (NULL); + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + if (wrk == NULL) + AZ(oc->flags & OC_F_NEEDFIXUP); - assert(st->space >= ltot); - ltot = st->len = st->space; + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); - o = STV_MkObject(sp, st->ptr, ltot, soc); + o = (void*)(sg->sc->base + so->ptr); + /* + * The object may not be in this segment since we allocate it + * In a separate operation than the smp_object. We could check + * that it is in a later segment, but that would be complicated. + * XXX: For now, be happy if it is inside th silo + */ + ASSERT_PTR_IN_SILO(sg->sc, o); CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - o->objstore = st; - oc = o->objcore; - CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - oc->flags |= OC_F_LRUDONTMOVE; + /* + * If this flag is not set, it will not be, and the lock is not + * needed to test it. + */ + if (!(oc->flags & OC_F_NEEDFIXUP)) + return (o); - Lck_Lock(&sc->mtx); - sg->nfixed++; - sg->nobj++; + AN(wrk); + Lck_Lock(&sg->sc->mtx); + /* Check again, we might have raced. */ + if (oc->flags & OC_F_NEEDFIXUP) { + /* We trust caller to have a refcnt for us */ + o->objcore = oc; - /* We have to do this somewhere, might as well be here... */ - assert(sizeof so->hash == DIGEST_LEN); - memcpy(so->hash, oc->objhead->digest, DIGEST_LEN); - so->ttl = o->ttl; /* XXX: grace? */ - so->ptr = (uint8_t*)o - sc->base; - so->ban = o->ban_t; + bad = 0; + l = 0; + VTAILQ_FOREACH(st, &o->store, list) { + bad |= smp_loaded_st(sg->sc, sg, st); + if (bad) + break; + l += st->len; + } + if (l != o->len) + bad |= 0x100; - oc->priv = sg; - oc->priv2 = objidx; - oc->methods = &smp_oc_methods; + if(bad) { + o->ttl = 0; + o->grace = 0; + so->ttl = 0; + } - Lck_Unlock(&sc->mtx); + sg->nfixed++; + wrk->stats.n_object++; + wrk->stats.n_vampireobject--; + oc->flags &= ~OC_F_NEEDFIXUP; + } + Lck_Unlock(&sg->sc->mtx); return (o); } -/*-------------------------------------------------------------------- - * Allocate a bite - */ - -static struct storage * -smp_alloc(struct stevedore *st, size_t size) -{ - - return (smp_allocx(st, - size > 4096 ? 4096 : size, size, NULL, NULL, NULL)); -} - -/*-------------------------------------------------------------------- - * Trim a bite - * XXX: We could trim the last allocation. - */ - static void -smp_trim(struct storage *ss, size_t size) -{ - - (void)ss; - (void)size; -} - -/*-------------------------------------------------------------------- - * We don't track frees of storage, we track the objects which own the - * storage and when there are no more objects in in the first segment, - * it can be reclaimed. - * XXX: We could free the last allocation, but does that happen ? - */ - -static void __match_proto__(storage_free_f) -smp_free(struct storage *st) +smp_oc_updatemeta(struct objcore *oc) { + struct object *o; + struct smp_seg *sg; + struct smp_object *so; + double mttl; - /* XXX */ - (void)st; -} + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + o = smp_oc_getobj(NULL, oc); + AN(o); -/*-------------------------------------------------------------------- - * Pause until all silos have loaded. - */ + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC); + so = smp_find_so(sg, oc); -void -SMP_Ready(void) -{ - struct smp_sc *sc; + if (isnan(o->grace)) + mttl = o->ttl; + else + mttl = - (o->ttl + o->grace); - ASSERT_CLI(); - do { - VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_SC_LOADED)) - break; - if (sc != NULL) - (void)sleep(1); - } while (sc != NULL); + if (sg == sg->sc->cur_seg) { + /* Lock necessary, we might race close_seg */ + Lck_Lock(&sg->sc->mtx); + so->ban = o->ban_t; + so->ttl = mttl; + Lck_Unlock(&sg->sc->mtx); + } else { + so->ban = o->ban_t; + so->ttl = mttl; + } } -/*--------------------------------------------------------------------*/ - -const struct stevedore smp_stevedore = { - .magic = STEVEDORE_MAGIC, - .name = "persistent", - .init = smp_mgt_init, - .open = smp_open, - .close = smp_close, - .alloc = smp_alloc, - .allocobj = smp_allocobj, - .getlru = smp_getlru, - .free = smp_free, - .trim = smp_trim, -}; - -/*-------------------------------------------------------------------- - * Persistence is a bear to test unadultered, so we cheat by adding - * a cli command we can use to make it do tricks for us. - */ - -static void -debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) +static void __match_proto__() +smp_oc_freeobj(struct objcore *oc) { struct smp_seg *sg; - struct objcore *oc; + struct smp_object *so; - cli_out(cli, "Silo: %s (%s)\n", - sc->stevedore->ident, sc->filename); - VTAILQ_FOREACH(sg, &sc->segments, list) { - cli_out(cli, " Seg: [0x%jx ... +0x%jx]\n", - (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length); - if (sg == sc->cur_seg) - cli_out(cli, - " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n", - (uintmax_t)(sc->next_bot), - (uintmax_t)(sc->next_top), - (uintmax_t)(sc->next_top - sc->next_bot)); - cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", - sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); - if (objs) { - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) - cli_out(cli, " %s %p\n", - oc == &sg->lru->senteniel ? - "senteniel" : "OC: ", oc); - } - } -} + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); -static void -debug_persistent(struct cli *cli, const char * const * av, void *priv) -{ - struct smp_sc *sc; + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + so = smp_find_so(sg, oc); - (void)priv; + Lck_Lock(&sg->sc->mtx); + so->ttl = 0; + so->ptr = 0; - if (av[2] == NULL) { - VTAILQ_FOREACH(sc, &silos, list) - debug_report_silo(cli, sc, 0); - return; - } - VTAILQ_FOREACH(sc, &silos, list) - if (!strcmp(av[2], sc->stevedore->ident)) - break; - if (sc == NULL) { - cli_out(cli, "Silo <%s> not found\n", av[2]); - cli_result(cli, CLIS_PARAM); - return; - } - if (av[3] == NULL) { - debug_report_silo(cli, sc, 0); - return; - } - Lck_Lock(&sc->mtx); - if (!strcmp(av[3], "sync")) { - smp_close_seg(sc, sc->cur_seg); - smp_new_seg(sc); - } else if (!strcmp(av[3], "dump")) { - debug_report_silo(cli, sc, 1); - } else { - cli_out(cli, "Unknown operation\n"); - cli_result(cli, CLIS_PARAM); - } - Lck_Unlock(&sc->mtx); + assert(sg->nobj > 0); + assert(sg->nfixed > 0); + sg->nobj--; + sg->nfixed--; + + Lck_Unlock(&sg->sc->mtx); } -static struct cli_proto debug_cmds[] = { - { "debug.persistent", "debug.persistent", - "Persistent debugging magic:\n" - "\tdebug.persistent [stevedore [cmd]]\n" - "With no cmd arg, a summary of the silo is returned.\n" - "Possible commands:\n" - "\tsync\tClose current segment, open a new one\n" - "\tdump\tinclude objcores in silo summary\n" - "", - 0, 2, "d", debug_persistent }, - { NULL } +static struct objcore_methods smp_oc_methods = { + .getobj = smp_oc_getobj, + .updatemeta = smp_oc_updatemeta, + .freeobj = smp_oc_freeobj, }; +/*--------------------------------------------------------------------*/ + void -SMP_Init(void) +smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx) { - CLI_AddFuncs(debug_cmds); + + oc->priv = sg; + oc->priv2 = objidx; + oc->methods = &smp_oc_methods; } diff --git a/bin/varnishd/storage_persistent_subr.c b/bin/varnishd/storage_persistent_subr.c index bc6e1fd..62ca3bf 100644 --- a/bin/varnishd/storage_persistent_subr.c +++ b/bin/varnishd/storage_persistent_subr.c @@ -169,7 +169,7 @@ smp_sync_sign(const struct smp_signctx *ctx) * Create and force a new signature to backing store */ -void +static void smp_new_sign(const struct smp_sc *sc, struct smp_signctx *ctx, uint64_t off, const char *id) { From kristian at varnish-cache.org Tue Feb 8 12:59:12 2011 From: kristian at varnish-cache.org (=?UTF-8?Q?Kristian_Lyngst=C3=B8l?=) Date: Tue, 08 Feb 2011 13:59:12 +0100 Subject: [master] 89556f3 Doc: client.identity came with 2.1.4, not 2.1.3 Message-ID: commit 89556f3d3f0b0521ea97ff2856a2feca2dd55f3f Author: Kristian Lyngstol Date: Tue Feb 8 13:58:55 2011 +0100 Doc: client.identity came with 2.1.4, not 2.1.3 diff --git a/doc/sphinx/reference/vcl.rst b/doc/sphinx/reference/vcl.rst index 2614d4b..d986cd5 100644 --- a/doc/sphinx/reference/vcl.rst +++ b/doc/sphinx/reference/vcl.rst @@ -174,9 +174,9 @@ The client director picks a backend based on the clients *identity*. You can set the VCL variable *client.identity* to identify the client by picking up the value of a session cookie or similar. -Note: from 2.1.0 to 2.1.2 *client.identity* isn't available and the +Note: from 2.1.0 to 2.1.3 *client.identity* isn't available and the director will use automatically set the idenity based on client.ip In -2.1.3 and onwards you can set client.identity to any string available. +2.1.4 and onwards you can set client.identity to any string available. The client director takes one option - *retries* which set the number of retries the director should take in order to find a healthy From phk at varnish-cache.org Wed Feb 9 09:17:18 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 10:17:18 +0100 Subject: [master] 9b8035d When my old math teacher made this kind of mistake and we caught it, he would always say "Just checking if you were paying attention". Message-ID: commit 9b8035d687bd2ed9ada1d951e230748e965dc6c2 Author: Poul-Henning Kamp Date: Wed Feb 9 09:15:44 2011 +0000 When my old math teacher made this kind of mistake and we caught it, he would always say "Just checking if you were paying attention". I guess Erik Inge Bols? was :-) diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 25d57cb..96e585e 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -654,7 +654,7 @@ cnt_fetch(struct sess *sp) * Space for producing a Content-Length: header including padding * A billion gigabytes is enough for anybody. */ - l += strlen("Content-Encoding: XxxXxxXxxXxxXxxXxx" + sizeof(void *)); + l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx" + sizeof(void *)); if (sp->wrk->ttl < sp->t_req + params->shortlived || sp->objcore == NULL) From phk at varnish-cache.org Wed Feb 9 09:58:29 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 10:58:29 +0100 Subject: [master] ef2face The LRU is a property of the objcore which is on it, not of whatever storage we have put the object into. Message-ID: commit ef2face7d1d546d64c1d64da8ebba7d03899ed94 Author: Poul-Henning Kamp Date: Wed Feb 9 09:58:02 2011 +0000 The LRU is a property of the objcore which is on it, not of whatever storage we have put the object into. diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index 32c6e45..da5c80e 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -354,11 +354,13 @@ struct storage { typedef struct object *getobj_f(struct worker *wrk, struct objcore *oc); typedef void updatemeta_f(struct objcore *oc); typedef void freeobj_f(struct objcore *oc); +typedef struct lru *getlru_f(const struct objcore *oc); struct objcore_methods { getobj_f *getobj; updatemeta_f *updatemeta; freeobj_f *freeobj; + getlru_f *getlru; }; extern struct objcore_methods default_oc_methods; @@ -407,6 +409,13 @@ oc_freeobj(struct objcore *oc) oc->methods->freeobj(oc); } +static inline struct lru * +oc_getlru(const struct objcore *oc) +{ + + return (oc->methods->getlru(oc)); +} + /*--------------------------------------------------------------------*/ struct lru { diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index bc791f1..23c9cc2 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -143,7 +143,7 @@ EXP_Insert(struct object *o) assert(o->entered != 0 && !isnan(o->entered)); o->last_lru = o->entered; - lru = STV_lru(o); + lru = oc_getlru(oc); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); Lck_Lock(&exp_mtx); (void)update_object_when(o); @@ -180,7 +180,7 @@ EXP_Touch(struct object *o, double tnow) if (oc->flags & OC_F_LRUDONTMOVE) return; - lru = STV_lru(o); + lru = oc_getlru(oc); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); if (Lck_Trylock(&exp_mtx)) diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 8e0ee36..ae0e634 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -283,19 +283,6 @@ STV_NewObject(struct sess *sp, const char *hint, unsigned wsl, double ttl, /*-------------------------------------------------------------------*/ -static struct lru * -stv_default_getlru(const struct object *o) -{ - - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC); - CHECK_OBJ_NOTNULL(o->objstore->stevedore, STEVEDORE_MAGIC); - CHECK_OBJ_NOTNULL(o->objstore->stevedore->lru, LRU_MAGIC); - return (o->objstore->stevedore->lru); -} - -/*-------------------------------------------------------------------*/ - void STV_Freestore(struct object *o) { @@ -341,9 +328,19 @@ default_oc_freeobj(struct objcore *oc) STV_free(o->objstore); } +static struct lru * +default_oc_getlru(const struct objcore *oc) +{ + struct object *o; + + CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC); + return (o->objstore->stevedore->lru); +} + struct objcore_methods default_oc_methods = { .getobj = default_oc_getobj, .freeobj = default_oc_freeobj, + .getlru = default_oc_getlru, }; /*-------------------------------------------------------------------*/ @@ -402,17 +399,6 @@ STV_close(void) stv->close(stv); } -struct lru * -STV_lru(const struct object *o) -{ - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC); - CHECK_OBJ_NOTNULL(o->objstore->stevedore, STEVEDORE_MAGIC); - AN(o->objstore->stevedore->getlru); - - return (o->objstore->stevedore->getlru(o)); -} - /*-------------------------------------------------------------------- * Parse a stevedore argument on the form: * [ name '=' ] strategy [ ',' arg ] * @@ -475,8 +461,6 @@ STV_Config(const char *spec) AN(stv->alloc); if (stv->allocobj == NULL) stv->allocobj = stv_default_allocobj; - if (stv->getlru == NULL) - stv->getlru = stv_default_getlru; if (p == NULL) bprintf(stv->ident, "s%u", seq++); diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h index 3951050..86da7ee 100644 --- a/bin/varnishd/stevedore.h +++ b/bin/varnishd/stevedore.h @@ -43,7 +43,6 @@ typedef void storage_trim_f(struct storage *, size_t size); typedef void storage_free_f(struct storage *); typedef struct object *storage_allocobj_f(struct stevedore *, struct sess *sp, unsigned ltot, const struct stv_objsecrets *); -typedef struct lru *storage_getlru_f(const struct object *); typedef void storage_close_f(const struct stevedore *); /* Prototypes for VCL variable responders */ @@ -59,7 +58,6 @@ struct stevedore { storage_init_f *init; /* called by mgt process */ storage_open_f *open; /* called by cache process */ storage_alloc_f *alloc; /* --//-- */ - storage_getlru_f *getlru; /* --//-- */ storage_trim_f *trim; /* --//-- */ storage_free_f *free; /* --//-- */ storage_close_f *close; /* --//-- */ @@ -88,7 +86,6 @@ void STV_trim(struct storage *st, size_t size); void STV_free(struct storage *st); void STV_open(void); void STV_close(void); -struct lru *STV_lru(const struct object *o); void STV_Config(const char *spec); void STV_Config_Transient(void); void STV_Freestore(struct object *o); diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 1ae9e80..1c25e85 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -66,7 +66,7 @@ SVNID("$Id$") static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos); /*-------------------------------------------------------------------- - * Add a new ban to all silos + * Add bans to silos */ static void @@ -96,6 +96,8 @@ smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx, double t0, smp_append_sign(ctx, ptr2, ptr - ptr2); } +/* Trust that cache_ban.c takes care of locking */ + void SMP_NewBan(double t0, const char *ban) { @@ -165,8 +167,6 @@ smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx) return (retval); } - - /*-------------------------------------------------------------------- * Attempt to open and read in a segment list */ @@ -468,20 +468,6 @@ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size, } /*-------------------------------------------------------------------- - * Find the per-segment lru list for this object - */ - -static struct lru * -smp_getlru(const struct object *o) -{ - struct smp_seg *sg; - - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - CAST_OBJ_NOTNULL(sg, o->objcore->priv, SMP_SEG_MAGIC); - return (sg->lru); -} - -/*-------------------------------------------------------------------- * Allocate an object */ @@ -575,24 +561,6 @@ smp_free(struct storage *st) (void)st; } -/*-------------------------------------------------------------------- - * Pause until all silos have loaded. - */ - -void -SMP_Ready(void) -{ - struct smp_sc *sc; - - ASSERT_CLI(); - do { - VTAILQ_FOREACH(sc, &silos, list) - if (!(sc->flags & SMP_SC_LOADED)) - break; - if (sc != NULL) - (void)sleep(1); - } while (sc != NULL); -} /*--------------------------------------------------------------------*/ @@ -604,7 +572,6 @@ const struct stevedore smp_stevedore = { .close = smp_close, .alloc = smp_alloc, .allocobj = smp_allocobj, - .getlru = smp_getlru, .free = smp_free, .trim = smp_trim, }; @@ -692,8 +659,29 @@ static struct cli_proto debug_cmds[] = { { NULL } }; +/*--------------------------------------------------------------------*/ + void SMP_Init(void) { CLI_AddFuncs(debug_cmds); } + +/*-------------------------------------------------------------------- + * Pause until all silos have loaded. + */ + +void +SMP_Ready(void) +{ + struct smp_sc *sc; + + ASSERT_CLI(); + do { + VTAILQ_FOREACH(sc, &silos, list) + if (!(sc->flags & SMP_SC_LOADED)) + break; + if (sc != NULL) + (void)sleep(1); + } while (sc != NULL); +} diff --git a/bin/varnishd/storage_persistent_silo.c b/bin/varnishd/storage_persistent_silo.c index 480474e..defb3d0 100644 --- a/bin/varnishd/storage_persistent_silo.c +++ b/bin/varnishd/storage_persistent_silo.c @@ -503,10 +503,24 @@ smp_oc_freeobj(struct objcore *oc) Lck_Unlock(&sg->sc->mtx); } +/*-------------------------------------------------------------------- + * Find the per-segment lru list for this object + */ + +static struct lru * +smp_oc_getlru(const struct objcore *oc) +{ + struct smp_seg *sg; + + CAST_OBJ_NOTNULL(sg, oc->priv, SMP_SEG_MAGIC); + return (sg->lru); +} + static struct objcore_methods smp_oc_methods = { .getobj = smp_oc_getobj, .updatemeta = smp_oc_updatemeta, .freeobj = smp_oc_freeobj, + .getlru = smp_oc_getlru, }; /*--------------------------------------------------------------------*/ From phk at varnish-cache.org Wed Feb 9 10:13:18 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 11:13:18 +0100 Subject: [master] d0db0a5 Get rid of the LRU list sentenniel. Message-ID: commit d0db0a59a90d54417ea00ccb4279d3f65c4c2ba4 Author: Poul-Henning Kamp Date: Wed Feb 9 10:12:44 2011 +0000 Get rid of the LRU list sentenniel. diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index da5c80e..08d10ec 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -382,7 +382,7 @@ struct objcore { #define OC_F_PRIV (1<<5) /* Stevedore private flag */ unsigned timer_idx; VTAILQ_ENTRY(objcore) list; - VLIST_ENTRY(objcore) lru_list; + VTAILQ_ENTRY(objcore) lru_list; VTAILQ_ENTRY(objcore) ban_list; struct ban *ban; }; @@ -416,15 +416,6 @@ oc_getlru(const struct objcore *oc) return (oc->methods->getlru(oc)); } -/*--------------------------------------------------------------------*/ - -struct lru { - unsigned magic; -#define LRU_MAGIC 0x3fec7bb0 - VLIST_HEAD(,objcore) lru_head; - struct objcore senteniel; -}; - /* Object structure --------------------------------------------------*/ VTAILQ_HEAD(storagehead, storage); @@ -634,7 +625,7 @@ void EXP_Inject(struct objcore *oc, struct lru *lru, double when); void EXP_Init(void); void EXP_Rearm(const struct object *o); void EXP_Touch(struct object *o, double tnow); -int EXP_NukeOne(const struct sess *sp, const struct lru *lru); +int EXP_NukeOne(const struct sess *sp, struct lru *lru); /* cache_fetch.c */ struct storage *FetchStorage(const struct sess *sp, ssize_t sz); diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index 23c9cc2..0d3b1a2 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -98,7 +98,7 @@ exp_insert(struct objcore *oc, struct lru *lru) assert(oc->timer_idx == BINHEAP_NOIDX); binheap_insert(exp_heap, oc); assert(oc->timer_idx != BINHEAP_NOIDX); - VLIST_INSERT_BEFORE(&lru->senteniel, oc, lru_list); + VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list); oc->flags |= OC_F_ONLRU; } @@ -187,8 +187,8 @@ EXP_Touch(struct object *o, double tnow) return; if (oc->flags & OC_F_ONLRU) { /* XXX ?? */ - VLIST_REMOVE(oc, lru_list); - VLIST_INSERT_BEFORE(&lru->senteniel, oc, lru_list); + VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); + VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list); VSC_main->n_lru_moved++; o->last_lru = tnow; } @@ -239,6 +239,7 @@ static void * __match_proto__(void *start_routine(void *)) exp_timer(struct sess *sp, void *priv) { struct objcore *oc; + struct lru *lru; double t; (void)priv; @@ -272,7 +273,8 @@ exp_timer(struct sess *sp, void *priv) /* And from LRU */ if (oc->flags & OC_F_ONLRU) { - VLIST_REMOVE(oc, lru_list); + lru = oc_getlru(oc); + VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); oc->flags &= ~OC_F_ONLRU; } @@ -293,19 +295,14 @@ exp_timer(struct sess *sp, void *priv) */ int -EXP_NukeOne(const struct sess *sp, const struct lru *lru) +EXP_NukeOne(const struct sess *sp, struct lru *lru) { struct objcore *oc; struct object *o; /* Find the first currently unused object on the LRU. */ Lck_Lock(&exp_mtx); - VLIST_FOREACH(oc, &lru->lru_head, lru_list) { - if (oc == &lru->senteniel) { - AZ(VLIST_NEXT(oc, lru_list)); - oc = NULL; - break; - } + VTAILQ_FOREACH(oc, &lru->lru_head, lru_list) { CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); if (oc->timer_idx == BINHEAP_NOIDX) /* exp_timer has it */ continue; @@ -313,7 +310,7 @@ EXP_NukeOne(const struct sess *sp, const struct lru *lru) break; } if (oc != NULL) { - VLIST_REMOVE(oc, lru_list); + VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); oc->flags &= ~OC_F_ONLRU; binheap_delete(exp_heap, oc->timer_idx); assert(oc->timer_idx == BINHEAP_NOIDX); diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index ae0e634..4865eaf 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -70,8 +70,7 @@ LRU_Alloc(void) ALLOC_OBJ(l, LRU_MAGIC); AN(l); - VLIST_INIT(&l->lru_head); - VLIST_INSERT_HEAD(&l->lru_head, &l->senteniel, lru_list); + VTAILQ_INIT(&l->lru_head); return (l); } diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h index 86da7ee..94a0e06 100644 --- a/bin/varnishd/stevedore.h +++ b/bin/varnishd/stevedore.h @@ -50,6 +50,16 @@ typedef void storage_close_f(const struct stevedore *); #include "vrt_stv_var.h" #undef VRTSTVTYPE +/*--------------------------------------------------------------------*/ + +struct lru { + unsigned magic; +#define LRU_MAGIC 0x3fec7bb0 + VTAILQ_HEAD(,objcore) lru_head; +}; + +/*--------------------------------------------------------------------*/ + struct stevedore { unsigned magic; #define STEVEDORE_MAGIC 0x4baf43db diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index 1c25e85..fa32472 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -601,10 +601,8 @@ debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs) cli_out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n", sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed); if (objs) { - VLIST_FOREACH(oc, &sg->lru->lru_head, lru_list) - cli_out(cli, " %s %p\n", - oc == &sg->lru->senteniel ? - "senteniel" : "OC: ", oc); + VTAILQ_FOREACH(oc, &sg->lru->lru_head, lru_list) + cli_out(cli, " OC %p\n", oc); } } } From phk at varnish-cache.org Wed Feb 9 10:40:28 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 11:40:28 +0100 Subject: [master] f9848a6 Allocate the LRU list in the child process instead of the master process. Message-ID: commit f9848a68d071478d3a0c5db5760ca4723e8d848f Author: Poul-Henning Kamp Date: Wed Feb 9 10:40:10 2011 +0000 Allocate the LRU list in the child process instead of the master process. diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 4865eaf..5b165e1 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -377,12 +377,15 @@ STV_open(void) struct stevedore *stv; VTAILQ_FOREACH(stv, &stevedores, list) { + stv->lru = LRU_Alloc(); if (stv->open != NULL) stv->open(stv); } stv = stv_transient; - if (stv->open != NULL) + if (stv->open != NULL) { + stv->lru = LRU_Alloc(); stv->open(stv); + } } void @@ -477,8 +480,6 @@ STV_Config(const char *spec) stv->ident, stv->name); } - stv->lru = LRU_Alloc(); - if (stv->init != NULL) stv->init(stv, ac, av); else if (ac != 0) From phk at varnish-cache.org Wed Feb 9 12:06:52 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 13:06:52 +0100 Subject: [master] 195fa50 Split the expire lock into expire and per-lru list locks. Message-ID: commit 195fa50ceba17392dd708f92c6203ea12ddcb72e Author: Poul-Henning Kamp Date: Wed Feb 9 12:04:25 2011 +0000 Split the expire lock into expire and per-lru list locks. The lock order is lru->exp lock, so that the timer_index can be safely examined while holding only the lru lock, eliminating the need for the separate OC_F_ONLRU flag. The critical trick is that in EXP_Touch() the lru_lock is sufficient: We just move the object around on the lru list, we don't add or delete it. This hopefully reduces lock contention on these locks. diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index 08d10ec..be209a7 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -100,8 +100,6 @@ struct vsc_lck; struct waitinglist; struct vef_priv; -struct lock { void *priv; }; // Opaque - #define DIGEST_LEN 32 /* Name of transient storage */ @@ -375,7 +373,6 @@ struct objcore { struct objhead *objhead; double timer_when; unsigned flags; -#define OC_F_ONLRU (1<<0) #define OC_F_BUSY (1<<1) #define OC_F_PASS (1<<2) #define OC_F_LRUDONTMOVE (1<<4) @@ -391,6 +388,9 @@ static inline struct object * oc_getobj(struct worker *wrk, struct objcore *oc) { + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + AN(oc->methods); + AN(oc->methods->getobj); return (oc->methods->getobj(wrk, oc)); } @@ -398,6 +398,8 @@ static inline void oc_updatemeta(struct objcore *oc) { + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + AN(oc->methods); if (oc->methods->updatemeta != NULL) oc->methods->updatemeta(oc); } @@ -406,6 +408,9 @@ static inline void oc_freeobj(struct objcore *oc) { + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + AN(oc->methods); + AN(oc->methods->freeobj); oc->methods->freeobj(oc); } @@ -413,6 +418,9 @@ static inline struct lru * oc_getlru(const struct objcore *oc) { + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + AN(oc->methods); + AN(oc->methods->getlru); return (oc->methods->getlru(oc)); } diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index 0d3b1a2..e7eca76 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -62,7 +62,7 @@ static struct lock exp_mtx; * so that other users of the object will not stumble trying to change the * ttl or lru position. */ -#define BINHEAP_NOIDX 0 +#define BINHEAP_NOIDX 0 /* XXX: should be in binary_heap.h */ /*-------------------------------------------------------------------- * When & why does the timer fire for this object ? @@ -99,7 +99,6 @@ exp_insert(struct objcore *oc, struct lru *lru) binheap_insert(exp_heap, oc); assert(oc->timer_idx != BINHEAP_NOIDX); VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list); - oc->flags |= OC_F_ONLRU; } /*-------------------------------------------------------------------- @@ -115,10 +114,12 @@ EXP_Inject(struct objcore *oc, struct lru *lru, double when) CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); + Lck_Lock(&lru->mtx); Lck_Lock(&exp_mtx); oc->timer_when = when; exp_insert(oc, lru); Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); } /*-------------------------------------------------------------------- @@ -145,10 +146,12 @@ EXP_Insert(struct object *o) lru = oc_getlru(oc); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); + Lck_Lock(&lru->mtx); Lck_Lock(&exp_mtx); (void)update_object_when(o); exp_insert(oc, lru); Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); oc_updatemeta(oc); } @@ -183,17 +186,23 @@ EXP_Touch(struct object *o, double tnow) lru = oc_getlru(oc); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); - if (Lck_Trylock(&exp_mtx)) + /* + * We only need the LRU lock here. The locking order is LRU->EXP + * so we can trust the content of the oc->timer_idx without the + * EXP lock. Since each lru list has its own lock, this should + * reduce contention a fair bit + */ + if (Lck_Trylock(&lru->mtx)) return; - if (oc->flags & OC_F_ONLRU) { /* XXX ?? */ + if (oc->timer_idx != BINHEAP_NOIDX) { VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list); VSC_main->n_lru_moved++; o->last_lru = tnow; } - Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); } /*-------------------------------------------------------------------- @@ -209,12 +218,15 @@ void EXP_Rearm(const struct object *o) { struct objcore *oc; + struct lru *lru; CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); oc = o->objcore; if (oc == NULL) return; CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + lru = oc_getlru(oc); + Lck_Lock(&lru->mtx); Lck_Lock(&exp_mtx); /* * The hang-man might have this object of the binheap while @@ -226,10 +238,10 @@ EXP_Rearm(const struct object *o) assert(oc->timer_idx != BINHEAP_NOIDX); } Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); oc_updatemeta(oc); } - /*-------------------------------------------------------------------- * This thread monitors the root of the binary heap and whenever an * object expires, accounting also for graceability, it is killed. @@ -244,27 +256,50 @@ exp_timer(struct sess *sp, void *priv) (void)priv; t = TIM_real(); + oc = NULL; while (1) { + if (oc == NULL) { + WSL_Flush(sp->wrk, 0); + WRK_SumStat(sp->wrk); + TIM_sleep(params->expiry_sleep); + t = TIM_real(); + } + Lck_Lock(&exp_mtx); oc = binheap_root(exp_heap); - CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC); + if (oc == NULL) { + Lck_Unlock(&exp_mtx); + continue; + } + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + /* * We may have expired so many objects that our timestamp * got out of date, refresh it and check again. */ - if (oc != NULL && oc->timer_when > t) + if (oc->timer_when > t) t = TIM_real(); - if (oc == NULL || oc->timer_when > t) { /* XXX: > or >= ? */ + if (oc->timer_when > t) { Lck_Unlock(&exp_mtx); - WSL_Flush(sp->wrk, 0); - WRK_SumStat(sp->wrk); - TIM_sleep(params->expiry_sleep); - t = TIM_real(); + oc = NULL; continue; } - /* It's time... */ - CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC); + /* + * It's time... + * Technically we should drop the exp_mtx, get the lru->mtx + * get the exp_mtx again and then check that the oc is still + * on the binheap. We take the shorter route and try to + * get the lru->mtx and punt if we fail. + */ + + lru = oc_getlru(oc); + CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); + if (Lck_Trylock(&lru->mtx)) { + Lck_Unlock(&exp_mtx); + oc = NULL; + continue; + } /* Remove from binheap */ assert(oc->timer_idx != BINHEAP_NOIDX); @@ -272,13 +307,11 @@ exp_timer(struct sess *sp, void *priv) assert(oc->timer_idx == BINHEAP_NOIDX); /* And from LRU */ - if (oc->flags & OC_F_ONLRU) { - lru = oc_getlru(oc); - VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); - oc->flags &= ~OC_F_ONLRU; - } + lru = oc_getlru(oc); + VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); VSC_main->n_expired++; @@ -301,26 +334,32 @@ EXP_NukeOne(const struct sess *sp, struct lru *lru) struct object *o; /* Find the first currently unused object on the LRU. */ + Lck_Lock(&lru->mtx); Lck_Lock(&exp_mtx); VTAILQ_FOREACH(oc, &lru->lru_head, lru_list) { CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); - if (oc->timer_idx == BINHEAP_NOIDX) /* exp_timer has it */ - continue; + assert (oc->timer_idx != BINHEAP_NOIDX); + /* + * It wont release any space if we cannot release the last + * reference, besides, if somebody else has a reference, + * it's a bad idea to nuke this object anyway. + */ if (oc->refcnt == 1) break; } if (oc != NULL) { VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); - oc->flags &= ~OC_F_ONLRU; binheap_delete(exp_heap, oc->timer_idx); assert(oc->timer_idx == BINHEAP_NOIDX); VSC_main->n_lru_nuked++; } Lck_Unlock(&exp_mtx); + Lck_Unlock(&lru->mtx); if (oc == NULL) return (-1); + /* XXX: bad idea for -spersistent */ o = oc_getobj(sp->wrk, oc); WSL(sp->wrk, SLT_ExpKill, 0, "%u LRU", o->xid); (void)HSH_Deref(sp->wrk, NULL, &o); diff --git a/bin/varnishd/cache_lck.c b/bin/varnishd/cache_lck.c index db0ee2b..831ef7b 100644 --- a/bin/varnishd/cache_lck.c +++ b/bin/varnishd/cache_lck.c @@ -162,6 +162,7 @@ Lck__New(struct lock *lck, struct vsc_lck *st, const char *w) { struct ilck *ilck; + AN(st); AZ(lck->priv); ALLOC_OBJ(ilck, ILCK_MAGIC); AN(ilck); diff --git a/bin/varnishd/common.h b/bin/varnishd/common.h index 1c29696..6cbbfd7 100644 --- a/bin/varnishd/common.h +++ b/bin/varnishd/common.h @@ -90,3 +90,6 @@ void vsm_iter_n(struct vsm_chunk **pp); #define VSM_CLASS_PARAM "Params" #define VSM_CLASS_MARK "MgrCld" #define VSM_COOL_TIME 5 + +/* cache_lck.c */ +struct lock { void *priv; }; // Opaque diff --git a/bin/varnishd/locks.h b/bin/varnishd/locks.h index 5f8ff63..ec56832 100644 --- a/bin/varnishd/locks.h +++ b/bin/varnishd/locks.h @@ -44,6 +44,7 @@ LOCK(herder) LOCK(wq) LOCK(objhdr) LOCK(exp) +LOCK(lru) LOCK(cli) LOCK(ban) LOCK(vbp) diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 5b165e1..5dd9982 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -71,6 +71,7 @@ LRU_Alloc(void) ALLOC_OBJ(l, LRU_MAGIC); AN(l); VTAILQ_INIT(&l->lru_head); + Lck_New(&l->mtx, lck_lru); return (l); } diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h index 94a0e06..c0c3c31 100644 --- a/bin/varnishd/stevedore.h +++ b/bin/varnishd/stevedore.h @@ -56,6 +56,7 @@ struct lru { unsigned magic; #define LRU_MAGIC 0x3fec7bb0 VTAILQ_HEAD(,objcore) lru_head; + struct lock mtx; }; /*--------------------------------------------------------------------*/ From phk at varnish-cache.org Wed Feb 9 13:00:34 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 14:00:34 +0100 Subject: [master] b6633c0 BINHEAP_NOIDX should come from the binheap implementation. Message-ID: commit b6633c0b17e0bea9f0913b440e9ae2a5d320770e Author: Poul-Henning Kamp Date: Wed Feb 9 13:00:16 2011 +0000 BINHEAP_NOIDX should come from the binheap implementation. Now it does. diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index e7eca76..32de4a3 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -57,13 +57,6 @@ static pthread_t exp_thread; static struct binheap *exp_heap; static struct lock exp_mtx; -/* - * This is a magic marker for the objects currently on the SIOP [look it up] - * so that other users of the object will not stumble trying to change the - * ttl or lru position. - */ -#define BINHEAP_NOIDX 0 /* XXX: should be in binary_heap.h */ - /*-------------------------------------------------------------------- * When & why does the timer fire for this object ? */ diff --git a/include/binary_heap.h b/include/binary_heap.h index 869ebf9..d8a4475 100644 --- a/include/binary_heap.h +++ b/include/binary_heap.h @@ -79,3 +79,4 @@ void *binheap_root(const struct binheap *); * Return the root item */ +#define BINHEAP_NOIDX 0 diff --git a/lib/libvarnish/binary_heap.c b/lib/libvarnish/binary_heap.c index 53ff738..125f7a2 100644 --- a/lib/libvarnish/binary_heap.c +++ b/lib/libvarnish/binary_heap.c @@ -351,7 +351,7 @@ binheap_delete(struct binheap *bh, unsigned idx) assert(idx < bh->next); assert(idx > 0); assert(A(bh, idx) != NULL); - bh->update(bh->priv, A(bh, idx), 0); + bh->update(bh->priv, A(bh, idx), BINHEAP_NOIDX); if (idx == --bh->next) { A(bh, bh->next) = NULL; return; From phk at varnish-cache.org Wed Feb 9 13:50:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 14:50:27 +0100 Subject: [master] abf653c Argument polishing Message-ID: commit abf653cdd5f6f5ac057f876fef2904c3a2db245f Author: Poul-Henning Kamp Date: Wed Feb 9 13:07:49 2011 +0000 Argument polishing diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index be209a7..0ab2dc1 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -632,7 +632,7 @@ void EXP_Insert(struct object *o); void EXP_Inject(struct objcore *oc, struct lru *lru, double when); void EXP_Init(void); void EXP_Rearm(const struct object *o); -void EXP_Touch(struct object *o, double tnow); +int EXP_Touch(struct objcore *oc); int EXP_NukeOne(const struct sess *sp, struct lru *lru); /* cache_fetch.c */ diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 96e585e..48f9c15 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -208,8 +208,9 @@ cnt_deliver(struct sess *sp) sp->t_resp = TIM_real(); if (sp->obj->objcore != NULL) { - if ((sp->t_resp - sp->obj->last_lru) > params->lru_timeout) - EXP_Touch(sp->obj, sp->t_resp); + if ((sp->t_resp - sp->obj->last_lru) > params->lru_timeout && + EXP_Touch(sp->obj->objcore)) + sp->obj->last_lru = sp->t_resp; sp->obj->last_use = sp->t_resp; /* XXX: locking ? */ } sp->wrk->resp = sp->wrk->http[2]; diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index 32de4a3..1da3532 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -156,14 +156,11 @@ EXP_Insert(struct object *o) * This optimization obviously leaves the LRU list imperfectly sorted. */ -void -EXP_Touch(struct object *o, double tnow) +int +EXP_Touch(struct objcore *oc) { - struct objcore *oc; struct lru *lru; - CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC); - oc = o->objcore; CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); /* @@ -174,7 +171,7 @@ EXP_Touch(struct object *o, double tnow) * the cleaner from doing its job. */ if (oc->flags & OC_F_LRUDONTMOVE) - return; + return (0); lru = oc_getlru(oc); CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); @@ -186,16 +183,15 @@ EXP_Touch(struct object *o, double tnow) * reduce contention a fair bit */ if (Lck_Trylock(&lru->mtx)) - return; + return (0); if (oc->timer_idx != BINHEAP_NOIDX) { VTAILQ_REMOVE(&lru->lru_head, oc, lru_list); VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list); VSC_main->n_lru_moved++; - o->last_lru = tnow; } - Lck_Unlock(&lru->mtx); + return (1); } /*-------------------------------------------------------------------- From phk at varnish-cache.org Wed Feb 9 13:50:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 14:50:27 +0100 Subject: [master] 57fed77 Add a LRU_Free() function Message-ID: commit 57fed777acfb201560f85f7f6c40df6588f52368 Author: Poul-Henning Kamp Date: Wed Feb 9 13:50:16 2011 +0000 Add a LRU_Free() function diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c index 5dd9982..912fd68 100644 --- a/bin/varnishd/stevedore.c +++ b/bin/varnishd/stevedore.c @@ -55,12 +55,6 @@ static const struct stevedore * volatile stv_next; static struct stevedore *stv_transient; /*-------------------------------------------------------------------- - * NB! Dirty trick alert: - * - * We use a captive objcore as tail senteniel for LRU lists, but to - * make sure it does not get into play by accident, we do _not_ - * initialize its magic with OBJCORE_MAGIC. - * */ struct lru * @@ -75,6 +69,14 @@ LRU_Alloc(void) return (l); } +void +LRU_Free(struct lru *lru) +{ + CHECK_OBJ_NOTNULL(lru, LRU_MAGIC); + Lck_Delete(&lru->mtx); + FREE_OBJ(lru); +} + /*-------------------------------------------------------------------- * XXX: trust pointer writes to be atomic */ diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h index c0c3c31..e97d70f 100644 --- a/bin/varnishd/stevedore.h +++ b/bin/varnishd/stevedore.h @@ -102,6 +102,7 @@ void STV_Config_Transient(void); void STV_Freestore(struct object *o); struct lru *LRU_Alloc(void); +void LRU_Free(struct lru *lru); int STV_GetFile(const char *fn, int *fdp, const char **fnp, const char *ctx); uintmax_t STV_FileSize(int fd, const char *size, unsigned *granularity, From phk at varnish-cache.org Wed Feb 9 14:03:52 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 15:03:52 +0100 Subject: [master] 2b2f723 Now we do. Message-ID: commit 2b2f723ec647f88a02a050508973be0380a73990 Author: Poul-Henning Kamp Date: Wed Feb 9 14:03:39 2011 +0000 Now we do. diff --git a/bin/varnishd/storage_persistent_silo.c b/bin/varnishd/storage_persistent_silo.c index defb3d0..d312ec9 100644 --- a/bin/varnishd/storage_persistent_silo.c +++ b/bin/varnishd/storage_persistent_silo.c @@ -30,7 +30,6 @@ * XXX: Before we start the client or maybe after it stops, we should give the * XXX: stevedores a chance to examine their storage for consistency. * - * XXX: Do we ever free the LRU-lists ? */ #include "config.h" @@ -101,7 +100,8 @@ smp_save_segs(struct smp_sc *sc) if (sg == sc->cur_seg) continue; VTAILQ_REMOVE(&sc->segments, sg, list); - free(sg); + LRU_Free(sg->lru); + FREE_OBJ(sg); } smp_save_seg(sc, &sc->seg1); smp_save_seg(sc, &sc->seg2); From phk at varnish-cache.org Wed Feb 9 15:07:15 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 09 Feb 2011 16:07:15 +0100 Subject: [master] 1a30dde Give the persistent watcher-thread something to do Message-ID: commit 1a30dde3bb72c9dc4b35766fe7a61197954e539b Author: Poul-Henning Kamp Date: Wed Feb 9 15:06:27 2011 +0000 Give the persistent watcher-thread something to do diff --git a/bin/varnishd/storage_persistent.c b/bin/varnishd/storage_persistent.c index fa32472..0fd2ee4 100644 --- a/bin/varnishd/storage_persistent.c +++ b/bin/varnishd/storage_persistent.c @@ -309,8 +309,16 @@ smp_thread(struct sess *sp, void *priv) BAN_Deref(&sc->tailban); sc->tailban = NULL; printf("Silo completely loaded\n"); - while (1) + while (1) { (void)sleep (1); + sg = VTAILQ_FIRST(&sc->segments); + if (sg != NULL && sg -> sc->cur_seg && + sg->nobj == 0) { + Lck_Lock(&sc->mtx); + smp_save_segs(sc); + Lck_Unlock(&sc->mtx); + } + } NEEDLESS_RETURN(NULL); } diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index 35affb6..bf4e2b5 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -185,6 +185,7 @@ void smp_load_seg(const struct sess *sp, const struct smp_sc *sc, void smp_new_seg(struct smp_sc *sc); void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg); void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx); +void smp_save_segs(struct smp_sc *sc); /* storage_persistent_subr.c */ diff --git a/bin/varnishd/storage_persistent_silo.c b/bin/varnishd/storage_persistent_silo.c index d312ec9..7a8bac2 100644 --- a/bin/varnishd/storage_persistent_silo.c +++ b/bin/varnishd/storage_persistent_silo.c @@ -83,7 +83,7 @@ smp_save_seg(const struct smp_sc *sc, struct smp_signctx *ctx) smp_sync_sign(ctx); } -static void +void smp_save_segs(struct smp_sc *sc) { struct smp_seg *sg, *sg2; From ingvar at varnish-cache.org Thu Feb 10 12:20:11 2011 From: ingvar at varnish-cache.org (Ingvar) Date: Thu, 10 Feb 2011 13:20:11 +0100 Subject: [master] 7819754 rhel4 build fix, plus changed som notes in the comments Message-ID: commit 78197542404b0ca6611defb83fd2a5a4fb4aadc1 Author: Ingvar Hagelund Date: Thu Feb 10 13:20:02 2011 +0100 rhel4 build fix, plus changed som notes in the comments diff --git a/redhat/varnish.spec b/redhat/varnish.spec index 07a3926..8d03fa8 100644 --- a/redhat/varnish.spec +++ b/redhat/varnish.spec @@ -1,16 +1,16 @@ Summary: High-performance HTTP accelerator Name: varnish Version: 3.0.0 -Release: 0.git20110203%{?dist} +Release: 0.git20110210%{?dist} License: BSD Group: System Environment/Daemons URL: http://www.varnish-cache.org/ #Source0: http://repo.varnish-cache.org/source/%{name}-%{version}.tar.gz Source0: %{name}-trunk.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -# The svn sources needs autoconf, automake and libtool to generate a suitable -# configure script. Release tarballs would not need this -BuildRequires: automake autoconf libtool python-docutils +# To build from git, start with a make dist, see redhat/README.redhat +# You will need at least automake autoconf libtool python-docutils +#BuildRequires: automake autoconf libtool python-docutils BuildRequires: ncurses-devel libxslt groff pcre-devel pkgconfig Requires: varnish-libs = %{version}-%{release} Requires: logrotate @@ -54,8 +54,6 @@ Varnish Cache is a high-performance HTTP accelerator %package docs Summary: Documentation files for %name Group: System Environment/Libraries -%if 0%{?rhel} > 4 && 0%{?fedora} > 10 -BuildRequires: python-sphinx %endif %description docs @@ -75,18 +73,14 @@ Documentation files for %name #%setup -q %setup -q -n varnish-trunk -# The svn sources needs to generate a suitable configure script -# Release tarballs would not need this -#./autogen.sh - mkdir examples cp bin/varnishd/default.vcl etc/zope-plone.vcl examples %build # No pkgconfig/libpcre.pc in rhel4 %if 0%{?rhel} == 4 - export PCRE_CFLAGS=`pcre-config --cflags` - export PCRE_LIBS=`pcre-config --libs` + export PCRE_CFLAGS="`pcre-config --cflags`" + export PCRE_LIBS="`pcre-config --libs`" %endif # Remove "--disable static" if you want to build static libraries From ingvar at varnish-cache.org Thu Feb 10 12:25:25 2011 From: ingvar at varnish-cache.org (Ingvar) Date: Thu, 10 Feb 2011 13:25:25 +0100 Subject: [master] 7a77305 typofix Message-ID: commit 7a773058d6cf8075565fe15e161a9dc74718b7f9 Author: Ingvar Hagelund Date: Thu Feb 10 13:25:20 2011 +0100 typofix diff --git a/redhat/varnish.spec b/redhat/varnish.spec index 8d03fa8..bb6dda9 100644 --- a/redhat/varnish.spec +++ b/redhat/varnish.spec @@ -54,7 +54,6 @@ Varnish Cache is a high-performance HTTP accelerator %package docs Summary: Documentation files for %name Group: System Environment/Libraries -%endif %description docs Documentation files for %name From phk at varnish-cache.org Fri Feb 11 08:28:17 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Fri, 11 Feb 2011 09:28:17 +0100 Subject: [master] 3a8158a Ohh man, I should be too old to make this kind of mistake. Message-ID: commit 3a8158a89f7ec63882bf1c08af61ffb5d236513d Author: Poul-Henning Kamp Date: Fri Feb 11 08:22:53 2011 +0000 Ohh man, I should be too old to make this kind of mistake. Fortunately there is a backstop, so worst case a request would just fail. Spotted by: Erik Missed by: phk, twice diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 48f9c15..9480db1 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -655,7 +655,7 @@ cnt_fetch(struct sess *sp) * Space for producing a Content-Length: header including padding * A billion gigabytes is enough for anybody. */ - l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx" + sizeof(void *)); + l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx") + sizeof(void *); if (sp->wrk->ttl < sp->t_req + params->shortlived || sp->objcore == NULL) From phk at varnish-cache.org Fri Feb 11 08:28:17 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Fri, 11 Feb 2011 09:28:17 +0100 Subject: [master] 3718b46 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit 3718b46b864d9e2f4771f8642b3028c823bbd240 Merge: 3a8158a 7a77305 Author: Poul-Henning Kamp Date: Fri Feb 11 08:28:12 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Fri Feb 11 11:08:55 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Fri, 11 Feb 2011 12:08:55 +0100 Subject: [master] 36a303d Use uintptr_t to avoid a downgrade warning on 32 bit systems, which a smarter compiler than GCC could have seen through. Message-ID: commit 36a303daaa74e7a07cff2b4dccbd6d18dc367f58 Author: Poul-Henning Kamp Date: Fri Feb 11 11:08:10 2011 +0000 Use uintptr_t to avoid a downgrade warning on 32 bit systems, which a smarter compiler than GCC could have seen through. diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index bf4e2b5..916272c 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -104,7 +104,7 @@ struct smp_sc { int fd; const char *filename; off_t mediasize; - uint64_t align; /* 64b to avoid casts */ + uintptr_t align; uint32_t granularity; uint32_t unique; From phk at varnish-cache.org Fri Feb 11 12:03:20 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Fri, 11 Feb 2011 13:03:20 +0100 Subject: [master] 4a2d55b Poll on HUP rather than OUT to detect worker process failure, this works better with Solaris Message-ID: commit 4a2d55b3e3387c6571f7c900f470a1d7ba3d9dc4 Author: Poul-Henning Kamp Date: Fri Feb 11 12:02:36 2011 +0000 Poll on HUP rather than OUT to detect worker process failure, this works better with Solaris diff --git a/bin/varnishtest/vtc_varnish.c b/bin/varnishtest/vtc_varnish.c index 7e59311..30ab828 100644 --- a/bin/varnishtest/vtc_varnish.c +++ b/bin/varnishtest/vtc_varnish.c @@ -304,7 +304,7 @@ varnish_launch(struct varnish *v) fd[0].fd = v->cli_fd; fd[0].events = POLLIN; fd[1].fd = v->fds[0]; - fd[1].events = POLLOUT; + fd[1].events = POLLHUP; i = poll(fd, 2, 10000); vtc_log(v->vl, 4, "CLIPOLL %d 0x%x 0x%x", i, fd[0].revents, fd[1].revents); From phk at varnish-cache.org Fri Feb 11 12:06:16 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Fri, 11 Feb 2011 13:06:16 +0100 Subject: [master] 81ce41b On 32 bit, set the worker thread stacksize to the lower of 64k and the minimum demanded by the system. Message-ID: commit 81ce41b57b0a0bb8697e238e51efff14fb70a567 Author: Poul-Henning Kamp Date: Fri Feb 11 12:05:34 2011 +0000 On 32 bit, set the worker thread stacksize to the lower of 64k and the minimum demanded by the system. diff --git a/bin/varnishd/mgt_pool.c b/bin/varnishd/mgt_pool.c index 86468ca..a612541 100644 --- a/bin/varnishd/mgt_pool.c +++ b/bin/varnishd/mgt_pool.c @@ -82,7 +82,7 @@ tweak_stack_size(struct cli *cli, const struct parspec *par, low = sysconf(_SC_THREAD_STACK_MIN); - if (arg != NULL && !strcmp(arg, "32")) { + if (arg != NULL && !strcmp(arg, "32bit")) { u = 65536; if (u < low) u = low; diff --git a/bin/varnishd/varnishd.c b/bin/varnishd/varnishd.c index 78765d2..0e16da7 100644 --- a/bin/varnishd/varnishd.c +++ b/bin/varnishd/varnishd.c @@ -418,7 +418,7 @@ main(int argc, char * const *argv) MCF_ParamSet(cli, "sess_workspace", "16384"); cli_check(cli); - MCF_ParamSet(cli, "thread_pool_stack", "65536"); + MCF_ParamSet(cli, "thread_pool_stack", "32bit"); cli_check(cli); MCF_ParamSet(cli, "gzip_stack_buffer", "4096"); From phk at varnish-cache.org Tue Feb 15 10:29:45 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 11:29:45 +0100 Subject: [master] 4981c41 Push the output gzip buffer before we process an included object, to maintain proper ordering. Message-ID: commit 4981c41f27e681a21e77538ecd27185a4ac8d87d Author: Poul-Henning Kamp Date: Tue Feb 15 10:27:10 2011 +0000 Push the output gzip buffer before we process an included object, to maintain proper ordering. diff --git a/bin/varnishd/cache_esi_deliver.c b/bin/varnishd/cache_esi_deliver.c index a8050de..62c1bf4 100644 --- a/bin/varnishd/cache_esi_deliver.c +++ b/bin/varnishd/cache_esi_deliver.c @@ -364,6 +364,11 @@ ESI_Deliver(struct sess *sp) q++; r = (void*)strchr((const char*)q, '\0'); AN(r); + if (obufl > 0) { + ved_sendchunk(sp, NULL, 0, + obuf, obufl); + obufl = 0; + } Debug("INCL [%s][%s] BEGIN\n", q, p); ved_include(sp, (const char*)q, (const char*)p); Debug("INCL [%s][%s] END\n", q, p); From phk at varnish-cache.org Tue Feb 15 10:29:46 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 11:29:46 +0100 Subject: [master] 499c3de Use malloc() for the vgz structures. Allocating it from the workspace runs into trouble when we reset the workspace as part of ESI:include processing. Message-ID: commit 499c3de911ec8042f7418a60742428c4ae04384d Author: Poul-Henning Kamp Date: Tue Feb 15 10:28:19 2011 +0000 Use malloc() for the vgz structures. Allocating it from the workspace runs into trouble when we reset the workspace as part of ESI:include processing. Fixes: #861 diff --git a/bin/varnishd/cache_gzip.c b/bin/varnishd/cache_gzip.c index 4522afd..3a4fda2 100644 --- a/bin/varnishd/cache_gzip.c +++ b/bin/varnishd/cache_gzip.c @@ -122,7 +122,9 @@ vgz_alloc_vgz(struct sess *sp, const char *id) struct ws *ws = sp->wrk->ws; WS_Assert(ws); - vg = (void*)WS_Alloc(ws, sizeof *vg); + // XXX: we restore workspace in esi:include + // vg = (void*)WS_Alloc(ws, sizeof *vg); + ALLOC_OBJ(vg, VGZ_MAGIC); AN(vg); memset(vg, 0, sizeof *vg); vg->magic = VGZ_MAGIC; @@ -377,6 +379,7 @@ VGZ_Destroy(struct vgz **vgp) (intmax_t)vg->vz.stop_bit); if (vg->tmp != NULL) WS_Reset(vg->tmp, vg->tmp_snapshot); + FREE_OBJ(vg); } /*-------------------------------------------------------------------- From phk at varnish-cache.org Tue Feb 15 10:29:47 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 11:29:47 +0100 Subject: [master] 64bb593 Regression test for #861 Message-ID: commit 64bb593d57e88c0a01a96944c8bac45217efa462 Author: Poul-Henning Kamp Date: Tue Feb 15 10:29:20 2011 +0000 Regression test for #861 diff --git a/bin/varnishtest/tests/r00861.vtc b/bin/varnishtest/tests/r00861.vtc new file mode 100644 index 0000000..b9329a9 --- /dev/null +++ b/bin/varnishtest/tests/r00861.vtc @@ -0,0 +1,29 @@ +# $Id$ + +test "Regression test for ESI/Gzip issues in #861" + +server s1 { + rxreq + expect req.url == "/1" + txresp -body {

} + rxreq + expect req.url == "/foo" + txresp -body + rxreq + expect req.url == "/bar" + txresp -body +} -start + +varnish v1 \ + -vcl+backend { + sub vcl_fetch { + set beresp.do_esi = true; + set beresp.do_gzip = true; + } + } -start + +client c1 { + txreq -url "/1" + rxresp +} -run + From phk at varnish-cache.org Tue Feb 15 12:05:39 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 13:05:39 +0100 Subject: [master] 09e0e8e Continuation of #861 fix: The vef structure cannot be on the workspace either. Message-ID: commit 09e0e8ec13433cb15274107060816bac138689dd Author: Poul-Henning Kamp Date: Tue Feb 15 12:04:54 2011 +0000 Continuation of #861 fix: The vef structure cannot be on the workspace either. diff --git a/bin/varnishd/cache_esi_fetch.c b/bin/varnishd/cache_esi_fetch.c index 34d98a4..7723a89 100644 --- a/bin/varnishd/cache_esi_fetch.c +++ b/bin/varnishd/cache_esi_fetch.c @@ -308,25 +308,28 @@ vfp_esi_begin(struct sess sp, size_t estimate) sp->wrk->vgz_rx = VGZ_NewUngzip(sp, "U F E"); VEP_Init(sp, NULL); } else if (sp->wrk->is_gunzip && sp->wrk->do_gzip) { - vef = (void)WS_Alloc(sp->ws, sizeof vef); - AN(vef); - memset(vef, 0, sizeof vef); - vef->magic = VEF_MAGIC; + ALLOC_OBJ(vef, VEF_MAGIC); + //vef = (void)WS_Alloc(sp->ws, sizeof vef); + //AN(vef); + //memset(vef, 0, sizeof vef); + //vef->magic = VEF_MAGIC; vef->vgz = VGZ_NewGzip(sp, "G F E"); AZ(sp->wrk->vef_priv); sp->wrk->vef_priv = vef; VEP_Init(sp, vfp_vep_callback); } else if (sp->wrk->is_gzip) { sp->wrk->vgz_rx = VGZ_NewUngzip(sp, "U F E"); - vef = (void)WS_Alloc(sp->ws, sizeof vef); - AN(vef); - memset(vef, 0, sizeof vef); - vef->magic = VEF_MAGIC; + ALLOC_OBJ(vef, VEF_MAGIC); + //vef = (void)WS_Alloc(sp->ws, sizeof vef); + //AN(vef); + //memset(vef, 0, sizeof vef); + //vef->magic = VEF_MAGIC; vef->vgz = VGZ_NewGzip(sp, "G F E"); AZ(sp->wrk->vef_priv); sp->wrk->vef_priv = vef; VEP_Init(sp, vfp_vep_callback); } else { + AZ(sp->wrk->vef_priv); VEP_Init(sp, NULL); } @@ -384,6 +387,7 @@ vfp_esi_end(struct sess sp) VGZ_Destroy(&vef->vgz); XXXAZ(vef->error); sp->obj->gziped = 1; + FREE_OBJ(vef); } else { sp->obj->gziped = 0; } diff --git a/bin/varnishd/cache_gzip.c b/bin/varnishd/cache_gzip.c index 3a4fda2..ea3684d 100644 --- a/bin/varnishd/cache_gzip.c +++ b/bin/varnishd/cache_gzip.c @@ -354,6 +354,7 @@ VGZ_UpdateObj(const struct vgz vg, struct object obj) { CHECK_OBJ_NOTNULL(vg, VGZ_MAGIC); + CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC); obj->gzip_start = vg->vz.start_bit; obj->gzip_last = vg->vz.last_bit; obj->gzip_stop = vg->vz.stop_bit; From tfheen at varnish-cache.org Tue Feb 15 12:52:41 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Tue, 15 Feb 2011 13:52:41 +0100 Subject: [master] 9764cc4 Add libedit support to varnishadm Message-ID: commit 9764cc4ece8c06051d4d9788474afa94d7c55a95 Author: Tollef Fog Heen Date: Tue Feb 15 13:49:26 2011 +0100 Add libedit support to varnishadm Optional libedit support to varnishadm. No saving of history or completion yet. diff --git a/bin/varnishadm/Makefile.am b/bin/varnishadm/Makefile.am index f5e12e9..2717acb 100644 --- a/bin/varnishadm/Makefile.am +++ b/bin/varnishadm/Makefile.am @@ -9,11 +9,13 @@ dist_man_MANS = varnishadm.1 varnishadm_SOURCES = \ varnishadm.c +varnishadm_CFLAGS = @LIBEDIT_CFLAGS@ + varnishadm_LDADD = \ $(top_builddir)/lib/libvarnish/libvarnish.la \ $(top_builddir)/lib/libvarnishapi/libvarnishapi.la \ $(top_builddir)/lib/libvarnishcompat/libvarnishcompat.la \ - ${PTHREAD_LIBS} ${NET_LIBS} + ${PTHREAD_LIBS} ${NET_LIBS} @LIBEDIT_LIBS@ varnishadm.1: $(top_srcdir)/doc/sphinx/reference/varnishadm.rst if HAVE_RST2MAN diff --git a/bin/varnishadm/varnishadm.c b/bin/varnishadm/varnishadm.c index 965d8cd..147438e 100644 --- a/bin/varnishadm/varnishadm.c +++ b/bin/varnishadm/varnishadm.c @@ -38,9 +38,12 @@ SVNID("$Id$") #include #include #include - #include +#ifdef HAVE_LIBEDIT +#include +#endif + #include "cli.h" #include "cli_common.h" #include "libvarnish.h" @@ -154,6 +157,19 @@ do_args(int sock, int argc, char * const argv) exit(1); } +#ifdef HAVE_LIBEDIT +/ Callback for readline, doesn't take a private pointer, so we need + * to have a global variable. + / +static int _line_sock; +void send_line(char l) +{ + cli_write(_line_sock, l); + cli_write(_line_sock, "\n"); + add_history(l); +} +#endif + /* * No arguments given, simply pass bytes on stdin/stdout and CLI socket * Send a "banner" to varnish, to provoke a welcome message. @@ -165,6 +181,16 @@ pass(int sock) char buf[1024]; int i, n, m; +#ifdef HAVE_LIBEDIT + _line_sock = sock; + rl_already_prompted = 1; + if (isatty(0)) { + rl_callback_handler_install("varnish> ", send_line); + } else { + rl_callback_handler_install("", send_line); + } +#endif + cli_write(sock, "banner\n"); fds[0].fd = sock; fds[0].events = POLLIN; @@ -182,13 +208,21 @@ pass(int sock) exit (0); } assert(n > 0); + /* Get rid of the prompt, kinda hackish / + write(1, "\r \r", 13); m = write(1, buf, n); if (n != m) { perror("Write error writing stdout"); exit (1); } +#ifdef HAVE_LIBEDIT + rl_forced_update_display(); +#endif } if (fds[1].revents & POLLIN) { +#ifdef HAVE_LIBEDIT + rl_callback_read_char(); +#else n = read(fds[1].fd, buf, sizeof buf); if (n == 0) { AZ(shutdown(sock, SHUT_WR)); @@ -196,12 +230,10 @@ pass(int sock) } else if (n < 0) { exit(0); } else { - m = write(sock, buf, n); - if (n != m) { - perror("Write error writing CLI socket"); - exit (1); - } + buf[n] = '\0'; + cli_write(sock, buf); } +#endif } } } diff --git a/configure.ac b/configure.ac index 7b17ba6..f574764 100644 --- a/configure.ac +++ b/configure.ac @@ -121,6 +121,9 @@ fi AC_SUBST(PCRE_CFLAGS) AC_SUBST(PCRE_LIBS) +PKG_CHECK_MODULES([LIBEDIT], [libedit], + [AC_DEFINE([HAVE_LIBEDIT], [1], [Define we have libedit])], + [AC_MSG_WARN([libedit not found, disabling libedit support])]) # Checks for header files. AC_HEADER_STDC From phk at varnish-cache.org Tue Feb 15 13:03:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 14:03:27 +0100 Subject: [master] d4441ab This is getting embarrasing: don't double free... Message-ID: commit d4441ab2acd499c400baae277b62faeadb39022b Author: Poul-Henning Kamp Date: Tue Feb 15 12:59:10 2011 +0000 This is getting embarrasing: don't double free... diff --git a/bin/varnishd/cache_esi_fetch.c b/bin/varnishd/cache_esi_fetch.c index 7723a89..7343285 100644 --- a/bin/varnishd/cache_esi_fetch.c +++ b/bin/varnishd/cache_esi_fetch.c @@ -361,8 +361,6 @@ vfp_esi_end(struct sess sp) ssize_t l; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); - if (sp->wrk->is_gzip && sp->wrk->do_gunzip) - VGZ_Destroy(&sp->wrk->vgz_rx); vsb = VEP_Finish(sp); From phk at varnish-cache.org Tue Feb 15 13:03:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 14:03:27 +0100 Subject: [master] e974d51 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit e974d51f26bd73f302de69a1ca75a606b14e802b Merge: d4441ab 9764cc4 Author: Poul-Henning Kamp Date: Tue Feb 15 13:01:21 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Tue Feb 15 13:34:39 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Tue, 15 Feb 2011 14:34:39 +0100 Subject: [master] 9aa8317 Zero the wrk->vgz_rx pointer once we have freed it. Message-ID: commit 9aa8317d68eb57049b7e5666e57f0ca9cedd323c Author: Poul-Henning Kamp Date: Tue Feb 15 13:32:09 2011 +0000 Zero the wrk->vgz_rx pointer once we have freed it. diff --git a/bin/varnishd/cache_esi_fetch.c b/bin/varnishd/cache_esi_fetch.c index 7343285..b650c2a 100644 --- a/bin/varnishd/cache_esi_fetch.c +++ b/bin/varnishd/cache_esi_fetch.c @@ -304,6 +304,7 @@ vfp_esi_begin(struct sess sp, size_t estimate) CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); / XXX: snapshot WS's ? We'll need the space / + AZ(sp->wrk->vgz_rx); if (sp->wrk->is_gzip && sp->wrk->do_gunzip) { sp->wrk->vgz_rx = VGZ_NewUngzip(sp, "U F E"); VEP_Init(sp, NULL); diff --git a/bin/varnishd/cache_fetch.c b/bin/varnishd/cache_fetch.c index 05e013f..72549c8 100644 --- a/bin/varnishd/cache_fetch.c +++ b/bin/varnishd/cache_fetch.c @@ -504,6 +504,7 @@ FetchBody(struct sess sp, const struct http hp) XXX: Missing: RFC2616 sec. 4.4 in re 1xx, 204 & 304 responses / + AZ(sp->wrk->vgz_rx); AZ(VTAILQ_FIRST(&sp->obj->store)); switch (sp->wrk->body_status) { case BS_NONE: @@ -537,6 +538,7 @@ FetchBody(struct sess sp, const struct http hp) INCOMPL(); } XXXAZ(sp->wrk->vfp->end(sp)); + AZ(sp->wrk->vgz_rx); / * It is OK for ->end to just leave the last storage segment * sitting on sp->wrk->storage, we will always call vfp_nop_end() diff --git a/bin/varnishd/cache_gzip.c b/bin/varnishd/cache_gzip.c index ea3684d..655398a 100644 --- a/bin/varnishd/cache_gzip.c +++ b/bin/varnishd/cache_gzip.c @@ -393,6 +393,7 @@ static void __match_proto__() vfp_gunzip_begin(struct sess sp, size_t estimate) { (void)estimate; + AZ(sp->wrk->vgz_rx); sp->wrk->vgz_rx = VGZ_NewUngzip(sp, "U F -"); } @@ -438,6 +439,7 @@ vfp_gunzip_end(struct sess sp) struct vgz vg; vg = sp->wrk->vgz_rx; + sp->wrk->vgz_rx = NULL; CHECK_OBJ_NOTNULL(vg, VGZ_MAGIC); VGZ_Destroy(&vg); sp->obj->gziped = 0; @@ -462,6 +464,7 @@ vfp_gzip_begin(struct sess sp, size_t estimate) { (void)estimate; + AZ(sp->wrk->vgz_rx); sp->wrk->vgz_rx = VGZ_NewGzip(sp, "G F -"); } @@ -507,6 +510,7 @@ vfp_gzip_end(struct sess sp) int i; vg = sp->wrk->vgz_rx; + sp->wrk->vgz_rx = NULL; CHECK_OBJ_NOTNULL(vg, VGZ_MAGIC); do { VGZ_Ibuf(vg, "", 0); @@ -587,6 +591,7 @@ vfp_testgzip_end(struct sess sp) struct vgz vg; vg = sp->wrk->vgz_rx; + sp->wrk->vgz_rx = NULL; CHECK_OBJ_NOTNULL(vg, VGZ_MAGIC); VGZ_UpdateObj(vg, sp->obj); VGZ_Destroy(&vg); From phk at varnish-cache.org Wed Feb 16 06:28:00 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:00 +0100 Subject: [master] ea15f75 If an esi-processed job starts with commit ea15f7599f53404e4e6d0a774451d29f4fe05692 Author: Poul-Henning Kamp Date: Tue Feb 15 21:35:48 2011 +0000 If an esi-processed job starts with wrk->gzip_resp = 0; if (isgzip && !(sp->wrk->res_mode & RES_GUNZIP)) { + assert(sizeof gzip_hdr == 10); + / Send out the gzip header / + ved_sendchunk(sp, "a\r\n", 3, gzip_hdr, 10); + sp->wrk->l_crc = 0; sp->wrk->gzip_resp = 1; sp->wrk->crc = crc32(0L, Z_NULL, 0); - } else - sp->wrk->gzip_resp = 0; + } } if (isgzip && !sp->wrk->gzip_resp) { vgz = VGZ_NewUngzip(sp, "U D E"); + + / Feed a gzip header to gunzip to make it happy / + VGZ_Ibuf(vgz, gzip_hdr, sizeof gzip_hdr); + VGZ_Obuf(vgz, obuf, sizeof obuf); + i = VGZ_Gunzip(vgz, &dp, &dl); + assert(i == Z_OK || i == Z_STREAM_END); + assert(VGZ_IbufEmpty(vgz)); + assert(dl == 0); + obufl = 0; } @@ -305,18 +325,7 @@ ESI_Deliver(struct sess sp) sp->wrk->crc = crc32_combine( sp->wrk->crc, icrc, l_icrc); sp->wrk->l_crc += l_icrc; - if (sp->esi_level > 0 && off == 0) { - /* - * Skip the GZ header, we know it is - * 10 bytes: we made it ourself. - / - assert(l > 10); - ved_sendchunk(sp, NULL, 0, - st->ptr + 10, l - 10); - } else { - ved_sendchunk(sp, r, q - r, - st->ptr + off, l); - } + ved_sendchunk(sp, r, q - r, st->ptr + off, l); } else if (sp->wrk->gzip_resp) { / * A gzip'ed ESI response, but the VEC was diff --git a/bin/varnishd/cache_esi_fetch.c b/bin/varnishd/cache_esi_fetch.c index b650c2a..5d61b62 100644 --- a/bin/varnishd/cache_esi_fetch.c +++ b/bin/varnishd/cache_esi_fetch.c @@ -143,7 +143,7 @@ struct vef_priv { }; /--------------------------------------------------------------------- - We receive a ungzip'ed object, and want to store it gzip'ed. + * We receive a [un]gzip'ed object, and want to store it gzip'ed. / static ssize_t From phk at varnish-cache.org Wed Feb 16 06:28:00 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:00 +0100 Subject: [master] ca6934f Also print out the text description of the gunzip error. Message-ID: commit ca6934fda07845ac02d649d6f809473b86cb21e9 Author: Poul-Henning Kamp Date: Tue Feb 15 21:24:52 2011 +0000 Also print out the text description of the gunzip error. diff --git a/bin/varnishd/cache_gzip.c b/bin/varnishd/cache_gzip.c index 655398a..7af71c8 100644 --- a/bin/varnishd/cache_gzip.c +++ b/bin/varnishd/cache_gzip.c @@ -302,7 +302,7 @@ VGZ_Gunzip(struct vgz vg, const void **pptr, size_t plen) return (1); if (i == Z_BUF_ERROR) return (2); -printf("INFLATE=%d\n", i); +printf("INFLATE=%d (%s)\n", i, vg->vz.msg); return (-1); } From phk at varnish-cache.org Wed Feb 16 06:28:01 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:01 +0100 Subject: [master] 6cc6035 (missed bit of this commit:) If an esi-processed job starts with commit 6cc603552b46aae02250563739387042f37a3b41 Author: Poul-Henning Kamp Date: Tue Feb 15 21:38:40 2011 +0000 (missed bit of this commit:) If an esi-processed job starts with 0); + assert(l > 0); assert(mark == SKIP || mark == VERBATIM); if (mark == SKIP) vep_emit_skip(vep, l); @@ -331,10 +332,11 @@ vep_mark_common(struct vep_state vep, const char p, enum vep_mark mark) assembled before the pending bytes. / - if (vep->last_mark != mark && vep->o_wait > 0) { + if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { lcb = vep->cb(vep->sp, 0, mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); - vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); + if (lcb - vep->o_last > 0) + vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); vep->o_last = lcb; vep->o_wait = 0; } @@ -1009,6 +1011,17 @@ VEP_Init(const struct sess sp, vep_callback_t cb) vep->state = VEP_START; vep->crc = crc32(0L, Z_NULL, 0); vep->crcp = crc32(0L, Z_NULL, 0); + + / + * We must force the GZIP header out as a SKIP string, otherwise + * an object starting with startup = 1; + vep->ver_p = ""; + vep->last_mark = SKIP; + vep_mark_common(vep, vep->ver_p, VERBATIM); + vep->startup = 0; } /--------------------------------------------------------------------- From phk at varnish-cache.org Wed Feb 16 06:28:00 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:00 +0100 Subject: [master] dd54061 Test corner-cases with an object containing only an with/without leading and training verbatim data. Message-ID: commit dd54061d7cab2ae7bc925a47efc9a87c4f7718cb Author: Poul-Henning Kamp Date: Tue Feb 15 21:37:04 2011 +0000 Test corner-cases with an object containing only an with/without leading and training verbatim data. diff --git a/bin/varnishtest/tests/e00026.vtc b/bin/varnishtest/tests/e00026.vtc new file mode 100644 index 0000000..dd58f78 --- /dev/null +++ b/bin/varnishtest/tests/e00026.vtc @@ -0,0 +1,82 @@ +# $Id$ + +test "Test esi-include + gzip edgecase with respect to gzip hdr" + +server s1 { + rxreq + expect req.url == "/foo" + txresp -gzipbody {
/////
} + + rxreq + expect req.url == "/1" + expect req.http.accept-encoding == gzip + txresp -gzipbody {BA} + + rxreq + expect req.url == "/2" + expect req.http.accept-encoding == gzip + txresp -gzipbody {B} + + rxreq + expect req.url == "/3" + expect req.http.accept-encoding == gzip + txresp -gzipbody {A} + + rxreq + expect req.url == "/4" + expect req.http.accept-encoding == gzip + txresp -gzipbody {} + +} -start + +varnish v1 -vcl+backend { + sub vcl_fetch { + if (req.url != "/foo") { + set beresp.do_esi = true; + } + } +} -start + +varnish v1 -cliok "param.set esi_syntax 0x21" + +varnish v1 -cliok "param.set diag_bitmap 0x10000" + +client c1 { + txreq -url /foo -hdr "Accept-Encoding: gzip" + rxresp + gunzip + expect resp.status == 200 + expect resp.bodylen == 13 + + txreq -url /1 -hdr "Accept-Encoding: gzip" + rxresp + expect resp.http.content-encoding == gzip + gunzip + expect resp.status == 200 + expect resp.bodylen == 15 + + txreq -url /2 -hdr "Accept-Encoding: gzip" + rxresp + expect resp.http.content-encoding == gzip + gunzip + expect resp.status == 200 + expect resp.bodylen == 14 + + txreq -url /3 -hdr "Accept-Encoding: gzip" + rxresp + expect resp.http.content-encoding == gzip + gunzip + expect resp.status == 200 + expect resp.bodylen == 14 + + txreq -url /4 -hdr "Accept-Encoding: gzip" + rxresp + expect resp.http.content-encoding == gzip + gunzip + expect resp.status == 200 + expect resp.bodylen == 13 + +} + +client c1 -run +varnish v1 -expect esi_errors == 0 From phk at varnish-cache.org Wed Feb 16 06:28:01 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:01 +0100 Subject: [master] 6fa90ee Take another stab at fixing #861 by clearing the correct bit position of the "last" bit in the included ESI object. Message-ID: commit 6fa90ee463d32187361bd19082242bd36957b8fd Author: Poul-Henning Kamp Date: Tue Feb 15 21:56:01 2011 +0000 Take another stab at fixing #861 by clearing the correct bit position of the "last" bit in the included ESI object. diff --git a/bin/varnishd/cache_esi_deliver.c b/bin/varnishd/cache_esi_deliver.c index 9734063..29543a8 100644 --- a/bin/varnishd/cache_esi_deliver.c +++ b/bin/varnishd/cache_esi_deliver.c @@ -497,7 +497,7 @@ ESI_DeliverChild(const struct sess sp) / cc = ved_deliver_byterange(sp, start/8, last/8); //printf("CC_LAST %x\n", cc); - cc &= ~(1U << (start & 7)); + cc &= ~(1U << (last & 7)); ved_sendchunk(sp, NULL, 0, &cc, 1); cc = ved_deliver_byterange(sp, 1 + last/8, stop/8); //printf("CC_STOP %x (%d)\n", cc, (int)(stop & 7)); diff --git a/bin/varnishtest/tests/r00861.vtc b/bin/varnishtest/tests/r00861.vtc index b9329a9..38db46b 100644 --- a/bin/varnishtest/tests/r00861.vtc +++ b/bin/varnishtest/tests/r00861.vtc @@ -12,18 +12,38 @@ server s1 { rxreq expect req.url == "/bar" txresp -body + + rxreq + expect req.url == "/barf" + txresp -body {[{"program":true,"id":972389,"vendorId":"15451701","starttime":1297777500000,"endtime":1297783500000,"title":"Swimming Pool","oTitle":"true","genre":"0x10x0","timeshiftEnabled":true},{"program":true,"id":972391,"vendorId":"15451702","starttime":1297783500000,"endtime":1297785000000,"title":"Fashion -Trends","oTitle":null,"genre":"0x30x0","timeshiftEnabled":true},{"program":true,"id":972384,"vendorId":"15451703","starttime":1297785000000,"endtime":1297786500000,"title":"Fashion - m?nd","oTitle":null,"genre":"0x30x0","timeshiftEnabled":true},{"program":true,"id":972388,"vendorId":"15451704","starttime":1297786500000,"endtime":1297789800000,"title":"The Day Before","oTitle":"true","genre":"0x30x0","timeshiftEnabled":true},{"program":true,"id":972393,"vendorId":"15451705","starttime":1297789800000,"endtime":1297793100000,"title":"Kessels ?je","oTitle":null,"genre":"0x20x3","timeshiftEnabled":true}]} + + rxreq + expect req.url == "/2" + txresp -body { } + } -start varnish v1 \ -vcl+backend { sub vcl_fetch { - set beresp.do_esi = true; - set beresp.do_gzip = true; + if (req.url == "/1" || req.url == "/2") { + set beresp.do_esi = true; + set beresp.do_gzip = true; + } } } -start client c1 { txreq -url "/1" rxresp -} -run + txreq -url "/barf" -hdr "Accept-Encoding: gzip" + rxresp + expect resp.http.Content-Encoding == resp.http.Content-Encoding + expect resp.bodylen == 909 + + txreq -url "/2" -hdr "Accept-Encoding: gzip" + rxresp + gunzip + expect resp.bodylen == 910 +} -run From phk at varnish-cache.org Wed Feb 16 06:28:01 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:01 +0100 Subject: [master] 1a8f105 Log useful gunzip state before we bail out Message-ID: commit 1a8f105625afeff39dc7d5d009f66390cdba39f5 Author: Poul-Henning Kamp Date: Tue Feb 15 21:59:34 2011 +0000 Log useful gunzip state before we bail out diff --git a/bin/varnishtest/vtc_http.c b/bin/varnishtest/vtc_http.c index ec50a33..7b9eb96 100644 --- a/bin/varnishtest/vtc_http.c +++ b/bin/varnishtest/vtc_http.c @@ -554,12 +554,10 @@ cmd_http_gunzip_body(CMD_ARGS) assert(Z_OK == inflateInit2(&vz, 31)); i = inflate(&vz, Z_FINISH); - if (i != Z_STREAM_END) - vtc_log(hp->vl, 0, "Gunzip error = %d (%s) in:%jd out:%jd", - i, vz.msg, (intmax_t)vz.total_in, (intmax_t)vz.total_out); hp->bodyl = vz.total_out; memcpy(hp->body, p, hp->bodyl); free(p); + vtc_log(hp->vl, 3, "new bodylen %u", hp->bodyl); vtc_dump(hp->vl, 4, "body", hp->body, hp->bodyl); bprintf(hp->bodylen, "%u", hp->bodyl); vtc_log(hp->vl, 4, "startbit = %ju %ju/%ju", @@ -568,6 +566,9 @@ cmd_http_gunzip_body(CMD_ARGS) vz.last_bit, vz.last_bit >> 3, vz.last_bit & 7); vtc_log(hp->vl, 4, "stopbit = %ju %ju/%ju", vz.stop_bit, vz.stop_bit >> 3, vz.stop_bit & 7); + if (i != Z_STREAM_END) + vtc_log(hp->vl, 0, "Gunzip error = %d (%s) in:%jd out:%jd", + i, vz.msg, (intmax_t)vz.total_in, (intmax_t)vz.total_out); assert(Z_OK == inflateEnd(&vz)); } From phk at varnish-cache.org Wed Feb 16 06:28:02 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 16 Feb 2011 07:28:02 +0100 Subject: [master] 2fface7 A short rant about why I will not add SSL support to Varnish. Message-ID: commit 2fface76e96096d5e869fdb90b06a4d59dbb53bc Author: Poul-Henning Kamp Date: Tue Feb 15 23:05:22 2011 +0000 A short rant about why I will not add SSL support to Varnish. diff --git a/doc/sphinx/phk/index.rst b/doc/sphinx/phk/index.rst index e07e50e..ee5a2c9 100644 --- a/doc/sphinx/phk/index.rst +++ b/doc/sphinx/phk/index.rst @@ -8,6 +8,7 @@ You may or may not want to know what Poul-Henning think. .. toctree:: + ssl.rst gzip.rst vcl_expr.rst ipv6suckage.rst diff --git a/doc/sphinx/phk/ssl.rst b/doc/sphinx/phk/ssl.rst new file mode 100644 index 0000000..bf47970 --- /dev/null +++ b/doc/sphinx/phk/ssl.rst @@ -0,0 +1,75 @@ +.. _phk_ssl: + +============ +Why no SSL ? +============ + +This is turning into a bit of a FAQ, but the answer is too big to fit +in the margin we use for those. + +There are a number of reasons why there are no plans in sight that will +grow SSL support in Varnish. + +First, I have yet to see a SSL library where the source code is not +a nightmare. + +As I am writing this, the varnish source-code tree contains 82.595 +lines of .c and .h files, including JEmalloc (12.236 lines) and +Zlib (12.344 lines). + +OpenSSL, as imported into FreeBSD, is 340.722 lines of code, nine +times larger than the Varnish source code, 27 times larger than +each of Zlib or JEmalloc. + +This should give you some indication of how insanely complex +the canonical implementation of SSL is. + +Second, it is not exactly the best source-code in the world. Even +if I have no idea what it does, there are many aspect of it that +scares me. + +Take this example in a comment, randomly found in s3-srvr.c:: + + / Throw away what we have done so far in the current handshake, + * which will now be aborted. (A full SSL_clear would be too much.) + * I hope that tmp.dh is the only thing that may need to be cleared + * when a handshake is not completed ... / + +I hope they know what they are doing, but this comment doesn't exactly +carry that point home, does it ? + +But let us assume that a good SSL library can be found, what would +Varnish do with it ? + +We would terminate SSL sessions, and we would burn CPU cycles doing +that. You can kiss the highly optimized delivery path in Varnish +goodby for SSL, we cannot simply tell the kernel to put the bytes +on the socket, rather, we have to corkscrew the data through +the SSL library and then write it to the socket. + +Will that be significantly different, performance wise, from running +a SSL proxy in separate process ? + +No, it will not, because the way varnish would have to do it would +be to ... start a separate process to do the SSL handling. + +There is no other way we can guarantee that secret krypto-bits do +not leak anywhere they should not, than by fencing in the code that +deals with them in a child process, so the bulk of varnish never +gets anywhere near the certificates, not even during a core-dump. + +Would I be able to write a better stand-alone SSL proxy process +than the many which already exists ? + +Probably not, unless I also write my own SSL implementation library, +including support for hardware crypto engines and the works. + +That is not one of the things I dreamt about doing as a kid and +if I dream about it now I call it a nightmare. + +So the balance sheet, as far as I can see it, lists "It would be +a bit easier to configure" on the plus side, and everything else +piles up on the minus side, making it a huge waste of time +and effort to even think about it.. + +Poul-Henning, 2011-02-15 From phk at varnish-cache.org Thu Feb 17 10:18:24 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 11:18:24 +0100 Subject: [master] 4c00fe8 Rename "pass" in vcl_fetch to "hit_for_pass" and respect a zerp TTL from VCL. Message-ID: commit 4c00fe8ca656903492dbabf144ab6a27d678e80e Author: Poul-Henning Kamp Date: Thu Feb 17 10:15:48 2011 +0000 Rename "pass" in vcl_fetch to "hit_for_pass" and respect a zerp TTL from VCL. Make the default VCL explictly set the TTL to two minutes, decoupling it from the default_ttl parameter. This makes it clear that hit-for-pass happens, and makes it possible to avoid the hit-for-pass object, (ie: get a plain pass) by setting its ttl to zero in vcl_fetch. diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 9480db1..4a8b6ac 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -566,14 +566,11 @@ cnt_fetch(struct sess sp) pass = 1; /* VCL may have fiddled this, but that doesn't help / sp->wrk->ttl = sp->t_req - 1.; - } else if (sp->handling == VCL_RET_PASS) { + } else if (sp->handling == VCL_RET_HIT_FOR_PASS) { / pass from vcl_fetch{} -> hit-for-pass / / XXX: the bereq was not filtered pass... / pass = 1; sp->objcore->flags |= OC_F_PASS; - / Enforce a minimum TTL of 1 sec (if set from VCL) / - if (sp->wrk->ttl <= sp->t_req) - sp->wrk->ttl = sp->wrk->entered + params->default_ttl; } else { / regular object / pass = 0; @@ -728,7 +725,7 @@ cnt_fetch(struct sess sp) sp->restarts++; sp->step = STP_RECV; return (0); - case VCL_RET_PASS: + case VCL_RET_HIT_FOR_PASS: case VCL_RET_DELIVER: break; case VCL_RET_ERROR: diff --git a/bin/varnishd/default.vcl b/bin/varnishd/default.vcl index 6093780..c25f560 100644 --- a/bin/varnishd/default.vcl +++ b/bin/varnishd/default.vcl @@ -102,14 +102,14 @@ sub vcl_miss { } sub vcl_fetch { - if (beresp.ttl <= 0s) { - return (pass); - } - if (beresp.http.Set-Cookie) { - return (pass); - } - if (beresp.http.Vary == "") { - return (pass); + if (beresp.ttl <= 0s || + beresp.http.Set-Cookie || + beresp.http.Vary == "") { + /* + * Mark as "Hit-For-Pass" for the next 2 minutes + / + set beresp.ttl = 120 s; + return (hit_for_pass); } return (deliver); } diff --git a/bin/varnishtest/tests/c00011.vtc b/bin/varnishtest/tests/c00011.vtc index f0f348d..ab03d6a 100644 --- a/bin/varnishtest/tests/c00011.vtc +++ b/bin/varnishtest/tests/c00011.vtc @@ -13,7 +13,7 @@ server s1 { varnish v1 -vcl+backend { sub vcl_fetch { - return(pass); + return(hit_for_pass); } } -start diff --git a/bin/varnishtest/tests/c00014.vtc b/bin/varnishtest/tests/c00014.vtc index 3fc83e6..b06d3df 100644 --- a/bin/varnishtest/tests/c00014.vtc +++ b/bin/varnishtest/tests/c00014.vtc @@ -17,7 +17,7 @@ server s1 { varnish v1 -vcl+backend { sub vcl_fetch { - return(pass); + return(hit_for_pass); } } -start diff --git a/bin/varnishtest/tests/e00011.vtc b/bin/varnishtest/tests/e00011.vtc index 9075639..5609ee1 100644 --- a/bin/varnishtest/tests/e00011.vtc +++ b/bin/varnishtest/tests/e00011.vtc @@ -20,7 +20,7 @@ server s1 { varnish v1 -vcl+backend { sub vcl_fetch { set beresp.do_esi = true; - return(pass); + return(hit_for_pass); } } -start diff --git a/bin/varnishtest/tests/r00318.vtc b/bin/varnishtest/tests/r00318.vtc index b081336..9cab6af 100644 --- a/bin/varnishtest/tests/r00318.vtc +++ b/bin/varnishtest/tests/r00318.vtc @@ -10,7 +10,7 @@ server s1 { varnish v1 -vcl+backend { sub vcl_fetch { set beresp.do_esi = true; - return(pass); + return(hit_for_pass); } } -start diff --git a/lib/libvcl/generate.py b/lib/libvcl/generate.py index d6f75e4..6e27946 100755 --- a/lib/libvcl/generate.py +++ b/lib/libvcl/generate.py @@ -91,7 +91,7 @@ returns =( ('hash', ('hash',)), ('miss', ('error', 'restart', 'pass', 'fetch',)), ('hit', ('error', 'restart', 'pass', 'deliver',)), - ('fetch', ('error', 'restart', 'pass', 'deliver',)), + ('fetch', ('error', 'restart', 'hit_for_pass', 'deliver',)), ('deliver', ('restart', 'deliver',)), ('error', ('restart', 'deliver',)), ) From phk at varnish-cache.org Thu Feb 17 19:16:35 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 20:16:35 +0100 Subject: [master] 84b7dda I had forgotten that arg spec strings are NUL terminated centrally so I added one NUL too many to enum spec strings, preventing any subsequent args from being handled correctly. Message-ID: commit 84b7dda89730901784afdaf48ac77e4a8d3d86e2 Author: Poul-Henning Kamp Date: Thu Feb 17 19:15:41 2011 +0000 I had forgotten that arg spec strings are NUL terminated centrally so I added one NUL too many to enum spec strings, preventing any subsequent args from being handled correctly. Spotted by: Nils Goroll diff --git a/lib/libvmod_std/vmod.py b/lib/libvmod_std/vmod.py index 7d751cb..a799dd8 100755 --- a/lib/libvmod_std/vmod.py +++ b/lib/libvmod_std/vmod.py @@ -141,7 +141,6 @@ def parse_enum(tq): raise Exception("Duplicate Enum value '%s'" % i) b[i] = True s = s + i.strip() + '\\0' - s = s + '\\0' return s ####################################################################### From phk at varnish-cache.org Thu Feb 17 20:14:22 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:14:22 +0100 Subject: [master] bb2765d Don't NUL terminate the enum spec in the enum spec writing function, the function-emitting function will do that. Message-ID: commit bb2765dcfe1127eda090154157a0e147754338a8 Author: Charlie Date: Thu Feb 17 19:13:21 2011 +0000 Don't NUL terminate the enum spec in the enum spec writing function, the function-emitting function will do that. Spotted by: Nils Goroll diff --git a/lib/libvmod_std/vmod.py b/lib/libvmod_std/vmod.py index 7d751cb..a799dd8 100755 --- a/lib/libvmod_std/vmod.py +++ b/lib/libvmod_std/vmod.py @@ -141,7 +141,6 @@ def parse_enum(tq): raise Exception("Duplicate Enum value '%s'" % i) b[i] = True s = s + i.strip() + '\\0' - s = s + '\\0' return s ####################################################################### From phk at varnish-cache.org Thu Feb 17 20:14:22 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:14:22 +0100 Subject: [master] b07804b Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit b07804bf8ac986c6af4317b36ec0221e8a5dadbc Merge: bb2765d 84b7dda Author: Poul-Henning Kamp Date: Thu Feb 17 19:16:44 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Thu Feb 17 20:14:22 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:14:22 +0100 Subject: [master] 10a2ab5 Rename max_esi_includes param to max_esi_depth and enforce it again. Message-ID: commit 10a2ab5fb00eb4ebf3acf2c58b6be0c4ac35048b Author: Poul-Henning Kamp Date: Thu Feb 17 20:14:04 2011 +0000 Rename max_esi_includes param to max_esi_depth and enforce it again. diff --git a/bin/varnishd/cache_esi_deliver.c b/bin/varnishd/cache_esi_deliver.c index 29543a8..48a9948 100644 --- a/bin/varnishd/cache_esi_deliver.c +++ b/bin/varnishd/cache_esi_deliver.c @@ -55,6 +55,10 @@ ved_include(struct sess sp, const char src, const char host) w = sp->wrk; + if (sp->esi_level >= params->max_esi_depth) + return; + sp->esi_level++; + if (WRW_Flush(w)) { vca_close_session(sp, "remote closed"); return; @@ -62,7 +66,6 @@ ved_include(struct sess sp, const char src, const char host) AZ(WRW_FlushRelease(w)); - sp->esi_level++; obj = sp->obj; sp->obj = NULL; res_mode = sp->wrk->res_mode; diff --git a/bin/varnishd/heritage.h b/bin/varnishd/heritage.h index 4b902a5..d028b5a 100644 --- a/bin/varnishd/heritage.h +++ b/bin/varnishd/heritage.h @@ -136,7 +136,7 @@ struct params { unsigned max_restarts; / Maximum esi:include depth allowed / - unsigned max_esi_includes; + unsigned max_esi_depth; / ESI parser hints / unsigned esi_syntax; diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index c8ad67c..1c49879 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -647,11 +647,11 @@ static const struct parspec input_parspec[] = { "Use 0x notation and do the bitor in your head :-)\n", 0, "0", "bitmap" }, - { "max_esi_includes", - tweak_uint, &master.max_esi_includes, 0, UINT_MAX, + { "max_esi_depth", + tweak_uint, &master.max_esi_depth, 0, UINT_MAX, "Maximum depth of esi:include processing.\n", 0, - "5", "includes" }, + "5", "levels" }, { "cache_vbcs", tweak_bool, &master.cache_vbcs, 0, 0, "Cache vbc's or rely on malloc, that's the question.", EXPERIMENTAL, From phk at varnish-cache.org Thu Feb 17 20:22:04 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:22:04 +0100 Subject: [master] 7c14e6a Enable range support by default Message-ID: commit 7c14e6a42f214615187caeac3cd6b5ac1b5c56dc Author: Poul-Henning Kamp Date: Thu Feb 17 20:21:53 2011 +0000 Enable range support by default diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 1c49879..ac1e92b 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -812,7 +812,7 @@ static const struct parspec input_parspec[] = { { "http_range_support", tweak_bool, &master.http_range_support, 0, 0, "Enable support for HTTP Range headers.\n", EXPERIMENTAL, - "off", "bool" }, + "on", "bool" }, { "http_gzip_support", tweak_bool, &master.http_gzip_support, 0, 0, "Enable gzip support. When enabled Varnish will compress " "uncompressed objects before they are stored in the cache. " diff --git a/bin/varnishtest/tests/c00034.vtc b/bin/varnishtest/tests/c00034.vtc index 0636639..3ec70df 100644 --- a/bin/varnishtest/tests/c00034.vtc +++ b/bin/varnishtest/tests/c00034.vtc @@ -9,6 +9,7 @@ server s1 { varnish v1 -vcl+backend { } -start +varnish v1 -cliok "param.set http_range_support off" client c1 { txreq -hdr "Range: bytes=0-9" From phk at varnish-cache.org Thu Feb 17 20:25:34 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:25:34 +0100 Subject: [master] a1f4fe2 Enable logging of hash string components by default. Message-ID: commit a1f4fe2edc9e0ff8d77331a3924e03249bf33d63 Author: Poul-Henning Kamp Date: Thu Feb 17 20:25:17 2011 +0000 Enable logging of hash string components by default. diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index ac1e92b..306e69e 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -748,9 +748,9 @@ static const struct parspec input_parspec[] = { 0, "8192", "bytes" }, { "log_hashstring", tweak_bool, &master.log_hash, 0, 0, - "Log the hash string to shared memory log.\n", + "Log the hash string components to shared memory log.\n", 0, - "off", "bool" }, + "on", "bool" }, { "log_local_address", tweak_bool, &master.log_local_addr, 0, 0, "Log the local address on the TCP connection in the " "SessionOpen shared memory record.\n", From phk at varnish-cache.org Thu Feb 17 20:37:47 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:37:47 +0100 Subject: [master] 38f99fa Introduce a new category of boiler-plate notice for parameters: things you should not muck about with unless I tell you to. Message-ID: commit 38f99fa695ed631d79f2e296f9936d8dcf0e12c5 Author: Poul-Henning Kamp Date: Thu Feb 17 20:37:14 2011 +0000 Introduce a new category of boiler-plate notice for parameters: things you should not muck about with unless I tell you to. diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 306e69e..f15a5a2 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -483,6 +483,10 @@ tweak_diag_bitmap(struct cli cli, const struct parspec par, const char arg) "this parameter, or if the default value is even sensible. " \ "Caution is advised, and feedback is most welcome." +#define WIZARD_TEXT \ + "\nNB: Do not change this parameter, unless a developer tell " \ + "you to do so." + /* * Remember to update varnishd.1 whenever you add / remove a parameter or * change its default value. @@ -862,7 +866,7 @@ static const struct parspec input_parspec[] = { &master.critbit_cooloff, 60, 254, "How long time the critbit hasher keeps deleted objheads " "on the cooloff list.\n", - EXPERIMENTAL, + WIZARD, "180.0", "s" }, { "vcl_dir", tweak_string, &mgt_vcl_dir, 0, 0, "Directory from which relative VCL filenames (vcl.load and " @@ -958,6 +962,8 @@ mcf_param_show(struct cli cli, const char const av, void priv) mcf_wrap(cli, MUST_RELOAD_TEXT); if (pp->flags & MUST_RESTART) mcf_wrap(cli, MUST_RESTART_TEXT); + if (pp->flags & WIZARD) + mcf_wrap(cli, WIZARD_TEXT); if (!lfmt) return; else diff --git a/bin/varnishd/vparam.h b/bin/varnishd/vparam.h index b506267..47eae29 100644 --- a/bin/varnishd/vparam.h +++ b/bin/varnishd/vparam.h @@ -41,10 +41,11 @@ struct parspec { double max; const char descr; int flags; -#define DELAYED_EFFECT 1 -#define EXPERIMENTAL 2 -#define MUST_RESTART 4 -#define MUST_RELOAD 8 +#define DELAYED_EFFECT (1<<0) +#define EXPERIMENTAL (1<<1) +#define MUST_RESTART (1<<2) +#define MUST_RELOAD (1<<3) +#define WIZARD (1<<4) const char def; const char units; }; From phk at varnish-cache.org Thu Feb 17 20:42:21 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:42:21 +0100 Subject: [master] 81a0fec At VUG3 this parameter was discussed and the consensus was that 0.4 seconds was a bit too aggresive for a default value, and after some talk forth and back 0.7 seconds was deemed more like it. Message-ID: commit 81a0fec26d32a25a04f00cfa7a2d1d759eb8e407 Author: Poul-Henning Kamp Date: Thu Feb 17 20:40:58 2011 +0000 At VUG3 this parameter was discussed and the consensus was that 0.4 seconds was a bit too aggresive for a default value, and after some talk forth and back 0.7 seconds was deemed more like it. diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index f15a5a2..7eb3234 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -668,7 +668,7 @@ static const struct parspec input_parspec[] = { "VCL can override this default value for each backend and " "backend request.", 0, - "0.4", "s" }, + "0.7", "s" }, { "first_byte_timeout", tweak_timeout_double, &master.first_byte_timeout,0, UINT_MAX, "Default timeout for receiving first byte from backend. " From phk at varnish-cache.org Thu Feb 17 20:52:35 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 21:52:35 +0100 Subject: [master] 3602936 Remove the backend connection memory structure cache, it predates our import of JEmalloc, has been turned of by default and does not improve performance when turned on. Message-ID: commit 36029366a7b234e227208740d7903fc87c216490 Author: Poul-Henning Kamp Date: Thu Feb 17 20:51:34 2011 +0000 Remove the backend connection memory structure cache, it predates our import of JEmalloc, has been turned of by default and does not improve performance when turned on. diff --git a/bin/varnishd/cache_backend.c b/bin/varnishd/cache_backend.c index 9330ec9..a1886e1 100644 --- a/bin/varnishd/cache_backend.c +++ b/bin/varnishd/cache_backend.c @@ -62,12 +62,6 @@ struct vdi_simple { const struct vrt_backend vrt; }; - -/* - * List of cached vbcs, used if enabled in params/heritage - / -static VTAILQ_HEAD(,vbc) vbcs = VTAILQ_HEAD_INITIALIZER(vbcs); - /-------------------------------------------------------------------- * Create default Host: header for backend request / @@ -97,17 +91,10 @@ VBE_ReleaseConn(struct vbc vc) vc->addr = NULL; vc->addrlen = 0; vc->recycled = 0; - if (params->cache_vbcs) { - Lck_Lock(&VBE_mtx); - VTAILQ_INSERT_HEAD(&vbcs, vc, list); - VSC_main->backend_unused++; - Lck_Unlock(&VBE_mtx); - } else { - Lck_Lock(&VBE_mtx); - VSC_main->n_vbc--; - Lck_Unlock(&VBE_mtx); - free(vc); - } + Lck_Lock(&VBE_mtx); + VSC_main->n_vbc--; + Lck_Unlock(&VBE_mtx); + FREE_OBJ(vc); } #define FIND_TMO(tmx, dst, sp, be) \ @@ -238,21 +225,8 @@ vbe_NewConn(void) { struct vbc vc; - vc = VTAILQ_FIRST(&vbcs); - if (vc != NULL) { - Lck_Lock(&VBE_mtx); - vc = VTAILQ_FIRST(&vbcs); - if (vc != NULL) { - VSC_main->backend_unused--; - VTAILQ_REMOVE(&vbcs, vc, list); - } - Lck_Unlock(&VBE_mtx); - } - if (vc != NULL) - return (vc); - vc = calloc(sizeof vc, 1); + ALLOC_OBJ(vc, VBC_MAGIC); XXXAN(vc); - vc->magic = VBC_MAGIC; vc->fd = -1; Lck_Lock(&VBE_mtx); VSC_main->n_vbc++; diff --git a/bin/varnishd/heritage.h b/bin/varnishd/heritage.h index d028b5a..7a8b90b 100644 --- a/bin/varnishd/heritage.h +++ b/bin/varnishd/heritage.h @@ -144,9 +144,6 @@ struct params { /* Rush exponent / unsigned rush_exponent; - / Cache vbcs / - unsigned cache_vbcs; - / Default connection_timeout / double connect_timeout; diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 7eb3234..aa0e693 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -656,10 +656,6 @@ static const struct parspec input_parspec[] = { "Maximum depth of esi:include processing.\n", 0, "5", "levels" }, - { "cache_vbcs", tweak_bool, &master.cache_vbcs, 0, 0, - "Cache vbc's or rely on malloc, that's the question.", - EXPERIMENTAL, - "off", "bool" }, { "connect_timeout", tweak_timeout_double, &master.connect_timeout,0, UINT_MAX, "Default connection timeout for backend connections. " From phk at varnish-cache.org Thu Feb 17 21:00:12 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 22:00:12 +0100 Subject: [master] 549c706 Add back a couple of AN()'s that I was a bit too fast in commenting out. Message-ID: commit 549c7061eae6918668f1094dae721a8c54365d6f Author: Poul-Henning Kamp Date: Thu Feb 17 20:59:54 2011 +0000 Add back a couple of AN()'s that I was a bit too fast in commenting out. diff --git a/bin/varnishd/cache_esi_fetch.c b/bin/varnishd/cache_esi_fetch.c index 5d61b62..385ee1c 100644 --- a/bin/varnishd/cache_esi_fetch.c +++ b/bin/varnishd/cache_esi_fetch.c @@ -310,8 +310,8 @@ vfp_esi_begin(struct sess sp, size_t estimate) VEP_Init(sp, NULL); } else if (sp->wrk->is_gunzip && sp->wrk->do_gzip) { ALLOC_OBJ(vef, VEF_MAGIC); + AN(vef); //vef = (void)WS_Alloc(sp->ws, sizeof vef); - //AN(vef); //memset(vef, 0, sizeof vef); //vef->magic = VEF_MAGIC; vef->vgz = VGZ_NewGzip(sp, "G F E"); @@ -321,8 +321,8 @@ vfp_esi_begin(struct sess sp, size_t estimate) } else if (sp->wrk->is_gzip) { sp->wrk->vgz_rx = VGZ_NewUngzip(sp, "U F E"); ALLOC_OBJ(vef, VEF_MAGIC); + AN(vef); //vef = (void)WS_Alloc(sp->ws, sizeof vef); - //AN(vef); //memset(vef, 0, sizeof *vef); //vef->magic = VEF_MAGIC; vef->vgz = VGZ_NewGzip(sp, "G F E"); From phk at varnish-cache.org Thu Feb 17 21:07:13 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 22:07:13 +0100 Subject: [master] 089a335 Assert that dlclose()'ing the vmod succeeds. Message-ID: commit 089a3352731d14be2cd902f451ceb99a3e33fe1d Author: Poul-Henning Kamp Date: Thu Feb 17 21:05:59 2011 +0000 Assert that dlclose()'ing the vmod succeeds. diff --git a/bin/varnishd/cache_vrt_vmod.c b/bin/varnishd/cache_vrt_vmod.c index 54cf59a..8b1ed3e 100644 --- a/bin/varnishd/cache_vrt_vmod.c +++ b/bin/varnishd/cache_vrt_vmod.c @@ -123,7 +123,7 @@ VRT_Vmod_Fini(void **hdl) if (--v->ref != 0) return; #ifndef DONT_DLCLOSE_VMODS - dlclose(v->hdl); + AZ(dlclose(v->hdl)); #endif free(v->nm); free(v->path); From perbu at varnish-cache.org Thu Feb 17 21:13:01 2011 From: perbu at varnish-cache.org (Per Andreas Buer) Date: Thu, 17 Feb 2011 22:13:01 +0100 Subject: [master] 4600acf Add a bunch of (?i) to get the message that the Host: header is case insensitive Message-ID: commit 4600acfb2f2a19adb14d27ad5ca948f2e9ed27fa Author: Per Buer Date: Thu Feb 17 22:12:51 2011 +0100 Add a bunch of (?i) to get the message that the Host: header is case insensitive diff --git a/doc/sphinx/reference/varnishd.rst b/doc/sphinx/reference/varnishd.rst index aee66cb..c929baf 100644 --- a/doc/sphinx/reference/varnishd.rst +++ b/doc/sphinx/reference/varnishd.rst @@ -839,30 +839,36 @@ waiter Purge expressions ----------------- -A purge expression consists of one or more conditions. A condition consists of a field, an operator, and an -argument. Conditions can be ANDed together with "&&". +A purge expression consists of one or more conditions. A condition +consists of a field, an operator, and an argument. Conditions can be +ANDed together with "&&". -A field can be any of the variables from VCL, for instance req.url, req.http.host or obj.set-cookie. +A field can be any of the variables from VCL, for instance req.url, +req.http.host or obj.set-cookie. -Operators are "==" for direct comparision, "~" for a regular expression match, and ">" or "<" for size compar? -isons. Prepending an operator with "!" negates the expression. +Operators are "==" for direct comparision, "~" for a regular +expression match, and ">" or "<" for size comparisons. Prepending +an operator with "!" negates the expression. -The argument could be a quoted string, a regexp, or an integer. Integers can have "KB", "MB", "GB" or "TB" -appended for size related fields. +The argument could be a quoted string, a regexp, or an integer. +Integers can have "KB", "MB", "GB" or "TB" appended for size related +fields. -Simple example: All requests where req.url exactly matches the string /news are purged from the cache::: +Simple example: All requests where req.url exactly matches the string +/news are purged from the cache::: req.url == "/news" -Example: Purge all documents where the name does not end with ".ogg", and where the size of the object is greater -than 10 megabytes::: +Example: Purge all documents where the name does not end with ".ogg", +and where the size of the object is greater than 10 megabytes::: req.url !~ "\.ogg$" && obj.size > 10MB -Example: Purge all documents where the serving host is "example.com" or "www.example.com", and where the Set- -Cookie header received from the backend contains "USERID=1663"::: +Example: Purge all documents where the serving host is "example.com" +or "www.example.com", and where the Set- Cookie header received from +the backend contains "USERID=1663"::: - req.http.host ~ "^(www\.)example.com$" && obj.set-cookie ~ "USERID=1663" + req.http.host ~ "^(?i)(www\.)example.com$" && obj.set-cookie ~ "USERID=1663" SEE ALSO ======== diff --git a/doc/sphinx/reference/vcl.rst b/doc/sphinx/reference/vcl.rst index d986cd5..6355913 100644 --- a/doc/sphinx/reference/vcl.rst +++ b/doc/sphinx/reference/vcl.rst @@ -92,7 +92,7 @@ A backend declaration creates and initializes a named backend object::: The backend object can later be used to select a backend at request time::: - if (req.http.host ~ "^(www.)?example.com$") { + if (req.http.host ~ "(?i)^(www.)?example.com$") { set req.backend = www; } @@ -528,9 +528,9 @@ Example::: # in file "backends.vcl" sub vcl_recv { - if (req.http.host ~ "example.com") { + if (req.http.host ~ "(?i)example.com") { set req.backend = foo; - } elsif (req.http.host ~ "example.org") { + } elsif (req.http.host ~ "(?i)example.org") { set req.backend = bar; } } @@ -731,7 +731,7 @@ Values may be assigned to variables using the set keyword::: sub vcl_recv { # Normalize the Host: header - if (req.http.host ~ "^(www.)?example.com$") { + if (req.http.host ~ "(?i)^(www.)?example.com$") { set req.http.host = "www.example.com"; } } @@ -803,10 +803,10 @@ based on the request URL::: } sub vcl_recv { - if (req.http.host ~ "^(www.)?example.com$") { + if (req.http.host ~ "(?i)^(www.)?example.com$") { set req.http.host = "www.example.com"; set req.backend = www; - } elsif (req.http.host ~ "^images.example.com$") { + } elsif (req.http.host ~ "(?i)^images.example.com$") { set req.backend = images; } else { error 404 "Unknown virtual host"; diff --git a/doc/sphinx/tutorial/increasing_your_hitrate.rst b/doc/sphinx/tutorial/increasing_your_hitrate.rst index b8fe6c2..f10358f 100644 --- a/doc/sphinx/tutorial/increasing_your_hitrate.rst +++ b/doc/sphinx/tutorial/increasing_your_hitrate.rst @@ -177,7 +177,7 @@ Varnish will cache different versions of every page for every hostname. You can mitigate this in your web server configuration by setting up redirects or by using the following VCL:: - if (req.http.host ~ "^(www.)?varnish-?software.com") { + if (req.http.host ~ "(?i)^(www.)?varnish-?software.com") { set req.http.host = "varnish-software.com"; } From perbu at varnish-cache.org Thu Feb 17 21:13:03 2011 From: perbu at varnish-cache.org (Per Andreas Buer) Date: Thu, 17 Feb 2011 22:13:03 +0100 Subject: [master] b6ec427 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit b6ec427f05bd9fb4eb3958ebc158ea404501d622 Merge: 4600acf 089a335 Author: Per Buer Date: Thu Feb 17 22:13:02 2011 +0100 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Thu Feb 17 21:25:22 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 22:25:22 +0100 Subject: [master] 90ff618 Increase the default http RX timeout for server/client to 5 seconds. Message-ID: commit 90ff618f2f1e163eea58a5d0a7dbf78d6f676f5e Author: Poul-Henning Kamp Date: Thu Feb 17 21:24:39 2011 +0000 Increase the default http RX timeout for server/client to 5 seconds. diff --git a/bin/varnishtest/vtc_http.c b/bin/varnishtest/vtc_http.c index 7b9eb96..5f8cd1b 100644 --- a/bin/varnishtest/vtc_http.c +++ b/bin/varnishtest/vtc_http.c @@ -1065,7 +1065,7 @@ http_process(struct vtclog vl, const char spec, int sock, int sfd) ALLOC_OBJ(hp, HTTP_MAGIC); AN(hp); hp->fd = sock; - hp->timeout = 3000; + hp->timeout = 5000; hp->nrxbuf = 6401024; hp->vsb = vsb_newauto(); hp->rxbuf = malloc(hp->nrxbuf); / XXX / From phk at varnish-cache.org Thu Feb 17 21:25:23 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 22:25:23 +0100 Subject: [master] faf767b Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit faf767b2c5799158318c3ea4fd5ee1a13fd5d52c Merge: 90ff618 b6ec427 Author: Poul-Henning Kamp Date: Thu Feb 17 21:25:18 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Thu Feb 17 21:54:40 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 17 Feb 2011 22:54:40 +0100 Subject: [master] 29f5d0f Increase the maximum test duration to a full minute since people seem to run into the 30 second timeout Message-ID: commit 29f5d0fbc422c0d454ee6063a5d0e832a8004270 Author: Poul-Henning Kamp Date: Thu Feb 17 21:53:59 2011 +0000 Increase the maximum test duration to a full minute since people seem to run into the 30 second timeout diff --git a/bin/varnishtest/vtc_main.c b/bin/varnishtest/vtc_main.c index 3afc448..cd67225 100644 --- a/bin/varnishtest/vtc_main.c +++ b/bin/varnishtest/vtc_main.c @@ -84,7 +84,7 @@ static VTAILQ_HEAD(, vtc_tst) tst_head = VTAILQ_HEAD_INITIALIZER(tst_head); static struct vev_base vb; static int njob = 0; static int npar = 1; /* Number of parallel tests / -static unsigned vtc_maxdur = 30; / Max duration of any test / +static unsigned vtc_maxdur = 60; / Max duration of any test / static int vtc_continue; / Continue on error / static int vtc_verbosity = 1; / Verbosity Level / static int vtc_good; From tfheen at varnish-cache.org Tue Feb 22 08:16:50 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Tue, 22 Feb 2011 09:16:50 +0100 Subject: [master] 830f983 Fix up spelling Message-ID: commit 830f9837cf22941333dd8a8a530f1b0138d67b30 Author: Tollef Fog Heen Date: Tue Feb 22 09:07:18 2011 +0100 Fix up spelling diff --git a/doc/sphinx/installation/install.rst b/doc/sphinx/installation/install.rst index 69c30d2..06010d6 100644 --- a/doc/sphinx/installation/install.rst +++ b/doc/sphinx/installation/install.rst @@ -7,7 +7,7 @@ With open source software, you can choose to install binary packages or compile stuff from source-code. To install a package or compile from source is a matter of personal taste. If you don't know which method too choose read the whole document and choose the method you -are most confortable with. +are most comfortable with. Source or packages? @@ -28,11 +28,11 @@ Binary package: CentOS/RedHat ~ -We try to keep the lastest version available as prebuildt RPMs (el4 & +We try to keep the latest version available as prebuilt RPMs (el4 & el5) on `SourceForge `_. Varnish is included in the `EPEL -`_ repository. Unfortunatly we +`_ repository. Unfortunately we had a syntax change in Varnish 2.0.6->2.1.X. This means that we can not update Varnish in `EPEL `_ so the latest version there is Varnish 2.0.6. @@ -64,8 +64,8 @@ If there are no binary packages available for your system, or if you want to compile Varnish from source for other reasons, follow these steps: -First get a copy of the sourcecode using the ``svn`` command. If -you do not have this command, you need to install SubVersion_ on +First get a copy of the source code using the ``svn`` command. If +you do not have this command, you need to install Subversion_ on your system. There is usually a binary package, try substituting "subversion" for "varnish" in the examples above, it might just work. @@ -93,10 +93,10 @@ installed. On a Debian or Ubuntu system these are: libpcre3-dev * pkg-config -Build dependencies on Red Hat / Centos +Build dependencies on Red Hat / CentOS ~~~~~~~~~~~~~~~~~~~~ -To build Varnish on a Red Hat or Centos system you need the following +To build Varnish on a Red Hat or CentOS system you need the following packages installed: * automake @@ -121,7 +121,7 @@ above satisfied. Once that is taken care of::: The ``configure`` script takes some arguments, but more likely than not, you can forget about that for now, almost everything in Varnish -are runtime parameters. +are run time parameters. Before you install, you may want to run the regression tests, make a cup of tea while it runs, it takes some minutes:: @@ -147,4 +147,4 @@ Varnish will now be installed in /usr/local. The varnishd binary is in You can now proceed to the :ref:`tutorial-index`. -.. _SubVersion: http://subversion.tigris.org/ +.. _Subversion: http://subversion.tigris.org/ From tfheen at varnish-cache.org Tue Feb 22 08:16:51 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Tue, 22 Feb 2011 09:16:51 +0100 Subject: [master] ef058a0 Fix up links and update for git. Message-ID: commit ef058a0c84cfcba3e6d47b2bb61ceecf251b6770 Author: Tollef Fog Heen Date: Tue Feb 22 09:14:52 2011 +0100 Fix up links and update for git. diff --git a/doc/sphinx/installation/install.rst b/doc/sphinx/installation/install.rst index 06010d6..d725f81 100644 --- a/doc/sphinx/installation/install.rst +++ b/doc/sphinx/installation/install.rst @@ -28,13 +28,15 @@ Binary package: CentOS/RedHat ~ -We try to keep the latest version available as prebuilt RPMs (el4 & -el5) on `SourceForge `_. +We try to keep the latest version available as prebuilt RPMs (el5) on +`repo.varnish-cache.org `. See the +`RedHat installation instructions +` for more information. Varnish is included in the `EPEL `_ repository. Unfortunately we had a syntax change in Varnish 2.0.6->2.1.X. This means that we can -not update Varnish in `EPEL `_ so +not update Varnish in `EPEL 5 `_ so the latest version there is Varnish 2.0.6. EPEL6 should have Varnish 2.1 available once it releases. @@ -44,7 +46,11 @@ Debian/Ubuntu Varnish is distributed with both Debian and Ubuntu. In order to get Varnish up and running type `sudo apt-get install varnish`. Please -note that this might not be the latest version of Varnish. +note that this might not be the latest version of Varnish. If you +need a later version of Varnish, please follow the installation +instructions for `Debian +` or `Ubuntu +`. Other systems ~ @@ -64,18 +70,16 @@ If there are no binary packages available for your system, or if you want to compile Varnish from source for other reasons, follow these steps: -First get a copy of the source code using the ``svn`` command. If -you do not have this command, you need to install Subversion_ on -your system. There is usually a binary package, try substituting -"subversion" for "varnish" in the examples above, it might just work. +We recommend downloading a release tarball, which you can find on +`repo.varnish-cache.org `. -To fetch the current (2.1) production branch::: +Alternatively, if you want to hack on Varnish, you should clone our +git repository by doing. - svn co http://varnish-cache.org/svn/branches/2.1 + git clone git://git.varnish-cache.org/varnish-cache -To get the development source code::: - - svn co http://varnish-cache.org/svn/trunk +Please note that a git checkout will need some more build-dependencies +than listed below, in particular the Python Docutils and Sphinx. Build dependencies on Debian / Ubuntu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -126,7 +130,7 @@ are run time parameters. Before you install, you may want to run the regression tests, make a cup of tea while it runs, it takes some minutes:: - (cd bin/varnishtest && ./varnishtest tests/.vtc) + make check Don't worry of a single or two tests fail, some of the tests are a bit too timing sensitive (Please tell us which so we can fix it) but @@ -146,5 +150,3 @@ Varnish will now be installed in /usr/local. The varnishd binary is in /usr/local/etc/varnish/default.vcl. You can now proceed to the :ref:`tutorial-index`. - -.. _Subversion: http://subversion.tigris.org/ From tfheen at varnish-cache.org Tue Feb 22 10:46:59 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Tue, 22 Feb 2011 11:46:59 +0100 Subject: [master] cf78444 Rename svn_version.c to vcs_version.c, simplify generation Message-ID: commit cf7844436784aa61763a702f3dfef8a6bf934f75 Author: Tollef Fog Heen Date: Tue Feb 22 11:44:14 2011 +0100 Rename svn_version.c to vcs_version.c, simplify generation Fix up the generation of vcs_version.c with the assumption that if you have a .git directory at the top, you have git installed. Drop logic for handling svn checkouts. diff --git a/include/libvarnish.h b/include/libvarnish.h index f21b1db..0493a2e 100644 --- a/include/libvarnish.h +++ b/include/libvarnish.h @@ -100,7 +100,7 @@ void varnish_version(const char ); int vtmpfile(char ); char vreadfile(const char pfx, const char fn); -const char* svn_version(void); +const char* vcs_version(void); /* Safe printf into a fixed-size buffer / #define bprintf(buf, fmt, ...) \ diff --git a/lib/libvarnish/Makefile.am b/lib/libvarnish/Makefile.am index 9d1056c..3a01d76 100644 --- a/lib/libvarnish/Makefile.am +++ b/lib/libvarnish/Makefile.am @@ -1,5 +1,3 @@ -# $Id$ - INCLUDES = -I$(top_srcdir)/include @PCRE_CFLAGS@ lib_LTLIBRARIES = libvarnish.la @@ -16,7 +14,7 @@ libvarnish_la_SOURCES = \ cli_serve.c \ flopen.c \ num.c \ - svn_version.c \ + vcs_version.c \ time.c \ tcp.c \ vct.c \ @@ -35,12 +33,12 @@ libvarnish_la_SOURCES = \ libvarnish_la_CFLAGS = -DVARNISH_STATE_DIR='"${VARNISH_STATE_DIR}"' libvarnish_la_LIBADD = ${RT_LIBS} ${NET_LIBS} ${LIBM} @PCRE_LIBS@ -DISTCLEANFILES = svn_version.c -svn_version.c: FORCE - V="$$(sh -c "git log -n 1 --pretty=format:%h" 2>/dev/null || LANG=C svnversion -n $(top_srcdir))" \ - H="$$(head -n 1 svn_version.c 2>/dev/null || true)"; \ - [ "$$V" = "exported" ] && [ -e svn_version.c ] && exit 0 ; \ - if [ "/ $$V /" != "$$H" ]; then \ +DISTCLEANFILES = vcs_version.c +vcs_version.c: FORCE + if [ -d "$(top_srcdir)/.git" ]; then \ + V="$$(git describe --always)" \ + H="$$(head -n 1 vcs_version.c 2>/dev/null || true)"; \ + if [ "/ $$V /" != "$$H" ]; then \ ( \ echo "/ $$V /" ;\ echo '/' ;\ @@ -51,12 +49,13 @@ svn_version.c: FORCE echo ' /' ;\ echo '' ;\ echo "#include " ;\ - echo "const char svn_version(void)" ;\ + echo "const char* vcs_version(void)" ;\ echo "{" ;\ - echo " const char* SVN_Version = \"$$V\";" ;\ - echo " return SVN_Version;" ;\ + echo " const char* VCS_Version = \"$$V\";" ;\ + echo " return VCS_Version;" ;\ echo "}" ;\ - ) > svn_version.c ; \ + ) > vcs_version.c ; \ + fi \ fi FORCE: diff --git a/lib/libvarnish/version.c b/lib/libvarnish/version.c index 5df094a..e5472a1 100644 --- a/lib/libvarnish/version.c +++ b/lib/libvarnish/version.c @@ -41,8 +41,8 @@ SVNID("$Id$") void varnish_version(const char progname) { - fprintf(stderr, "%s (%s-%s SVN %s)\n", progname, - PACKAGE_TARNAME, PACKAGE_VERSION, svn_version()); + fprintf(stderr, "%s (%s-%s revision %s)\n", progname, + PACKAGE_TARNAME, PACKAGE_VERSION, vcs_version()); fprintf(stderr, "Copyright (c) 2006-2009 Linpro AS / Verdens Gang AS\n"); } diff --git a/lib/libvarnishapi/Makefile.am b/lib/libvarnishapi/Makefile.am index 9aa49ce..5879f74 100644 --- a/lib/libvarnishapi/Makefile.am +++ b/lib/libvarnishapi/Makefile.am @@ -12,7 +12,7 @@ libvarnishapi_la_SOURCES = \ \ ../libvarnish/assert.c \ ../libvarnish/argv.c \ - ../libvarnish/svn_version.c \ + ../libvarnish/vcs_version.c \ ../libvarnish/version.c \ ../libvarnish/vin.c \ ../libvarnish/vmb.c \ From phk at varnish-cache.org Wed Feb 23 11:26:50 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 23 Feb 2011 12:26:50 +0100 Subject: [master] 6edf9c3 Overhaul HTTP request/response size limits: Message-ID: commit 6edf9c379ed0ff20171c87c056730cf9084949b4 Author: Poul-Henning Kamp Date: Wed Feb 23 11:24:28 2011 +0000 Overhaul HTTP request/response size limits: http_headers is now called http_max_hdr and covers both requests and responses. http_{req|resp}_size sets an upper limit on requests and responses, and we (still) summarily close the connection if it is exceeded. (default: 32k) http_{req|resp}_hdr_len sets the max size for any HTTP header line except the first line (URL/response). (default 512) diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h index 0ab2dc1..9ca8034 100644 --- a/bin/varnishd/cache.h +++ b/bin/varnishd/cache.h @@ -183,6 +183,8 @@ struct http_conn { #define HTTP_CONN_MAGIC 0x3e19edd1 int fd; + unsigned maxbytes; + unsigned maxhdr; struct ws ws; txt rxbuf; txt pipeline; @@ -702,7 +704,8 @@ void http_Unset(struct http hp, const char hdr); void http_CollectHdr(struct http hp, const char hdr); /* cache_httpconn.c / -void HTC_Init(struct http_conn htc, struct ws ws, int fd); +void HTC_Init(struct http_conn htc, struct ws ws, int fd, unsigned maxbytes, + unsigned maxhdr); int HTC_Reinit(struct http_conn htc); int HTC_Rx(struct http_conn htc); ssize_t HTC_Read(struct http_conn htc, void d, size_t len); diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 4a8b6ac..30c9f32 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -382,7 +382,7 @@ cnt_error(struct sess sp) HSH_Prealloc(sp); /* XXX: 1024 is a pure guess / sp->obj = STV_NewObject(sp, NULL, 1024, 0, - params->http_headers); + params->http_max_hdr); sp->obj->xid = sp->xid; sp->obj->entered = sp->t_req; } else { @@ -769,7 +769,8 @@ cnt_first(struct sess sp) sp->ws_ses = WS_Snapshot(sp->ws); /* Receive a HTTP protocol request / - HTC_Init(sp->htc, sp->ws, sp->fd); + HTC_Init(sp->htc, sp->ws, sp->fd, params->http_req_size, + params->http_req_hdr_len); sp->wrk->lastused = sp->t_open; sp->acct_tmp.sess++; diff --git a/bin/varnishd/cache_fetch.c b/bin/varnishd/cache_fetch.c index 72549c8..911d576 100644 --- a/bin/varnishd/cache_fetch.c +++ b/bin/varnishd/cache_fetch.c @@ -438,7 +438,8 @@ FetchHdr(struct sess sp) /* Receive response / - HTC_Init(sp->wrk->htc, sp->wrk->ws, vc->fd); + HTC_Init(sp->wrk->htc, sp->wrk->ws, vc->fd, params->http_resp_size, + params->http_resp_hdr_len); TCP_set_read_timeout(vc->fd, vc->first_byte_timeout); diff --git a/bin/varnishd/cache_http.c b/bin/varnishd/cache_http.c index 323cead..4ebf878 100644 --- a/bin/varnishd/cache_http.c +++ b/bin/varnishd/cache_http.c @@ -491,9 +491,11 @@ http_GetReq(const struct http hp) / static int -http_dissect_hdrs(struct worker w, struct http hp, int fd, char p, txt t) +http_dissect_hdrs(struct worker w, struct http hp, int fd, char p, + const struct http_conn htc) { char q, r; + txt t = htc->rxbuf; if (p == '\r') p++; @@ -524,6 +526,12 @@ http_dissect_hdrs(struct worker w, struct http hp, int fd, char p, txt t) q++ = ' '; } + if (q - p > htc->maxhdr) { + VSC_main->losthdr++; + WSL(w, SLT_LostHeader, fd, "%.s", q - p, p); + return (400); + } + /* Empty header = end of headers / if (p == q) break; @@ -629,7 +637,7 @@ http_splitline(struct worker w, int fd, struct http hp, WSLH(w, fd, hp, h3); } - return (http_dissect_hdrs(w, hp, fd, p, htc->rxbuf)); + return (http_dissect_hdrs(w, hp, fd, p, htc)); } /--------------------------------------------------------------------/ diff --git a/bin/varnishd/cache_httpconn.c b/bin/varnishd/cache_httpconn.c index a73b5e1..93a7820 100644 --- a/bin/varnishd/cache_httpconn.c +++ b/bin/varnishd/cache_httpconn.c @@ -46,8 +46,7 @@ SVNID("$Id$") Check if we have a complete HTTP request or response yet * * Return values: - * -1 No, and you can nuke the (white-space) content. - * 0 No, keep trying + * 0 No, keep trying * >0 Yes, it is this many bytes long. / @@ -83,14 +82,17 @@ htc_header_complete(txt t) /--------------------------------------------------------------------/ void -HTC_Init(struct http_conn htc, struct ws ws, int fd) +HTC_Init(struct http_conn htc, struct ws ws, int fd, unsigned maxbytes, + unsigned maxhdr) { htc->magic = HTTP_CONN_MAGIC; htc->ws = ws; htc->fd = fd; - /* XXX: ->s or ->f ? or param ? / - (void)WS_Reserve(htc->ws, (htc->ws->e - htc->ws->s) / 2); + htc->maxbytes = maxbytes; + htc->maxhdr = maxhdr; + + (void)WS_Reserve(htc->ws, htc->maxbytes); htc->rxbuf.b = ws->f; htc->rxbuf.e = ws->f; htc->rxbuf.e = '\0'; @@ -110,7 +112,7 @@ HTC_Reinit(struct http_conn htc) unsigned l; CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC); - (void)WS_Reserve(htc->ws, (htc->ws->e - htc->ws->s) / 2); + (void)WS_Reserve(htc->ws, htc->maxbytes); htc->rxbuf.b = htc->ws->f; htc->rxbuf.e = htc->ws->f; if (htc->pipeline.b != NULL) { diff --git a/bin/varnishd/cache_pool.c b/bin/varnishd/cache_pool.c index 27b7253..225c08e 100644 --- a/bin/varnishd/cache_pool.c +++ b/bin/varnishd/cache_pool.c @@ -224,7 +224,7 @@ wrk_thread(void priv) CAST_OBJ_NOTNULL(qp, priv, WQ_MAGIC); /* We need to snapshot these two for consistency / - nhttp = params->http_headers; + nhttp = params->http_max_hdr; siov = nhttp 2; if (siov > IOV_MAX) siov = IOV_MAX; diff --git a/bin/varnishd/cache_session.c b/bin/varnishd/cache_session.c index f69d35e..8f26890 100644 --- a/bin/varnishd/cache_session.c +++ b/bin/varnishd/cache_session.c @@ -116,7 +116,7 @@ ses_sm_alloc(void) * view of the value. / nws = params->sess_workspace; - nhttp = params->http_headers; + nhttp = params->http_max_hdr; hl = HTTP_estimate(nhttp); l = sizeof sm + nws + 2 * hl; p = malloc(l); diff --git a/bin/varnishd/cache_ws.c b/bin/varnishd/cache_ws.c index e55b1c7..62133d4 100644 --- a/bin/varnishd/cache_ws.c +++ b/bin/varnishd/cache_ws.c @@ -173,6 +173,8 @@ WS_Reserve(struct ws ws, unsigned bytes) assert(ws->r == NULL); if (bytes == 0) b2 = ws->e - ws->f; + else if (bytes > ws->e - ws->f) + b2 = ws->e - ws->f; else b2 = bytes; b2 = PRNDDN(b2); diff --git a/bin/varnishd/heritage.h b/bin/varnishd/heritage.h index 7a8b90b..c527f98 100644 --- a/bin/varnishd/heritage.h +++ b/bin/varnishd/heritage.h @@ -96,7 +96,11 @@ struct params { / Memory allocation hints / unsigned sess_workspace; unsigned shm_workspace; - unsigned http_headers; + unsigned http_req_size; + unsigned http_req_hdr_len; + unsigned http_resp_size; + unsigned http_resp_hdr_len; + unsigned http_max_hdr; unsigned shm_reclen; diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index aa0e693..7a9ba14 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -519,10 +519,44 @@ static const struct parspec input_parspec[] = { DELAYED_EFFECT, "65536", "bytes" }, - { "http_headers", tweak_uint, &master.http_headers, 32, UINT_MAX, - "Maximum number of HTTP headers we will deal with.\n" - "This space is preallocated in sessions and workthreads only " - "objects allocate only space for the headers they store.\n", + { "http_req_hdr_len", tweak_uint, &master.http_req_hdr_len, + 40, UINT_MAX, + "Maximum length of any HTTP client request header we will " + "allow. The limit is inclusive its continuation lines.\n", + 0, + "512", "bytes" }, + { "http_req_size", tweak_uint, &master.http_req_size, + 256, UINT_MAX, + "Maximum number of bytes of HTTP client request we will deal " + "with. This is a limit on all bytes up to the double blank " + "line which ends the HTTP request.\n" + "The memory for the request is allocated from the session " + "workspace (param: sess_workspace) and this parameter limits " + "how much of that the request is allowed to take up.", + 0, + "32768", "bytes" }, + { "http_resp_hdr_len", tweak_uint, &master.http_resp_hdr_len, + 40, UINT_MAX, + "Maximum length of any HTTP backend response header we will " + "allow. The limit is inclusive its continuation lines.\n", + 0, + "512", "bytes" }, + { "http_resp_size", tweak_uint, &master.http_resp_size, + 256, UINT_MAX, + "Maximum number of bytes of HTTP backend resonse we will deal " + "with. This is a limit on all bytes up to the double blank " + "line which ends the HTTP request.\n" + "The memory for the request is allocated from the worker " + "workspace (param: sess_workspace) and this parameter limits " + "how much of that the request is allowed to take up.", + 0, + "32768", "bytes" }, + { "http_max_hdr", tweak_uint, &master.http_max_hdr, 32, UINT_MAX, + "Maximum number of HTTP headers we will deal with in " + "client request or backend reponses. " + "Note that the first line occupies five header fields.\n" + "This paramter does not influence storage consumption, " + "objects allocate exact space for the headers they store.\n", 0, "64", "header lines" }, { "shm_workspace", tweak_uint, &master.shm_workspace, 4096, UINT_MAX, diff --git a/bin/varnishtest/tests/c00039.vtc b/bin/varnishtest/tests/c00039.vtc new file mode 100644 index 0000000..a53742f --- /dev/null +++ b/bin/varnishtest/tests/c00039.vtc @@ -0,0 +1,54 @@ +# $Id$ + +test "request req and hdr length limits" + +server s1 { + rxreq + expect req.url == "/1" + txresp -bodylen 5 + + rxreq + expect req.url == "/2" + txresp -bodylen 5 +} -start + +varnish v1 \ + -vcl+backend { + } -start + +varnish v1 -cliok "param.set http_req_size 256" +varnish v1 -cliok "param.set http_req_hdr_len 40" + +client c1 { + txreq -url "/1" -hdr "1...5: ..0....5....0....5....0....5....0" + rxresp + expect resp.status == 200 + txreq -url "/1" -hdr "1...5....0....5....0....5....0....5....0." + rxresp + expect resp.status == 400 +} -run + +client c1 { + txreq -url "/2" -hdr "1...5: ..0....5\n ..0....5....0....5....0" + rxresp + expect resp.status == 200 + txreq -url "/2" -hdr "1...5....0....5\n ..0....5....0....5....0." + rxresp + expect resp.status == 400 +} -run + +client c1 { + txreq -url "/1" \ + -hdr "1...5: ..0....5\n ..0....5....0....5....0" \ + -hdr "1...5: ..0....5\n ..0....5....0....5....0" \ + -hdr "1...5: ..0....5\n ..0....5....0....5....0" \ + -hdr "1...5: ..0....5\n ..0....5....0....5....0" \ + -hdr "1...5: ..0....5\n ..0....5....0....5....0" \ + -hdr "1...5: ..0....5\n ..0...." + rxresp + expect resp.status == 200 + # XXX: Varnish test does not allow us to test for the fact + # XXX: that the backend summarily closes on us. Adding one + # XXX: char to the above test, should cause that. +} -run + diff --git a/bin/varnishtest/tests/c00040.vtc b/bin/varnishtest/tests/c00040.vtc new file mode 100644 index 0000000..9faf18e --- /dev/null +++ b/bin/varnishtest/tests/c00040.vtc @@ -0,0 +1,84 @@ +# $Id$ + +test "request resp and hdr length limits" + +server s1 { + rxreq + expect req.url == "/1" + txresp \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -bodylen 1 + rxreq + expect req.url == "/2" + txresp \ + -hdr "1...5: ..0....5....0....5....0....5....0." \ + -bodylen 2 + accept + rxreq + expect req.url == "/3" + txresp \ + -hdr "1...5: ..0....5....0\n ..5....0....5....0" \ + -bodylen 3 + rxreq + expect req.url == "/4" + txresp \ + -hdr "1...5: ..0....5....0\n ..5....0....5....0." \ + -bodylen 4 + + accept + rxreq + expect req.url == "/5" + txresp \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5" \ + -hdr "1...5: ..0" \ + -bodylen 5 + + rxreq + expect req.url == "/6" + txresp \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5....0" \ + -hdr "1...5: ..0....5....0....5....0....5" \ + -hdr "1...5: ..0." \ + -bodylen 6 +} -start + +varnish v1 \ + -vcl+backend { + } -start + +varnish v1 -cliok "param.set http_resp_size 256" +varnish v1 -cliok "param.set http_resp_hdr_len 40" + +client c1 { + txreq -url "/1" + rxresp + expect resp.status == 200 + txreq -url "/2" + rxresp + expect resp.status == 503 +} -run +client c1 { + txreq -url "/3" + rxresp + expect resp.status == 200 + txreq -url "/4" + rxresp + expect resp.status == 503 +} -run +client c1 { + txreq -url "/5" + rxresp + expect resp.status == 200 + + txreq -url "/6" + rxresp + expect resp.status == 503 +} -run + diff --git a/bin/varnishtest/tests/r00498.vtc b/bin/varnishtest/tests/r00498.vtc index 951d507..6e4e915 100644 --- a/bin/varnishtest/tests/r00498.vtc +++ b/bin/varnishtest/tests/r00498.vtc @@ -11,6 +11,8 @@ server s1 { varnish v1 -vcl+backend { } -start +varnish v1 -cliok "param.set http_resp_hdr_len 32768" + client c1 { txreq rxresp From phk at varnish-cache.org Wed Feb 23 12:45:52 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 23 Feb 2011 13:45:52 +0100 Subject: [master] 841596c Split the worker thread workspace size out into its own parameter "thread_pool_workspace" Message-ID: commit 841596c7d0e8b88a3bbbfa17849b6808564c8ea8 Author: Poul-Henning Kamp Date: Wed Feb 23 12:45:21 2011 +0000 Split the worker thread workspace size out into its own parameter "thread_pool_workspace" diff --git a/bin/varnishd/cache_pool.c b/bin/varnishd/cache_pool.c index 225c08e..49cfc9f 100644 --- a/bin/varnishd/cache_pool.c +++ b/bin/varnishd/cache_pool.c @@ -123,6 +123,7 @@ wrk_thread_real(struct wq qp, unsigned shm_workspace, unsigned sess_workspace, { struct worker w, ww; uint32_t wlog[shm_workspace / 4]; + / XXX: can we trust these to be properly aligned ? / unsigned char ws[sess_workspace]; unsigned char http0[http_space]; unsigned char http1[http_space]; @@ -230,7 +231,7 @@ wrk_thread(void priv) siov = IOV_MAX; return (wrk_thread_real(qp, params->shm_workspace, - params->sess_workspace, + params->wthread_workspace, nhttp, HTTP_estimate(nhttp), siov)); } diff --git a/bin/varnishd/heritage.h b/bin/varnishd/heritage.h index c527f98..fb94312 100644 --- a/bin/varnishd/heritage.h +++ b/bin/varnishd/heritage.h @@ -90,6 +90,7 @@ struct params { unsigned wthread_purge_delay; unsigned wthread_stats_rate; unsigned wthread_stacksize; + unsigned wthread_workspace; unsigned queue_max; diff --git a/bin/varnishd/mgt_pool.c b/bin/varnishd/mgt_pool.c index a612541..cd0a371 100644 --- a/bin/varnishd/mgt_pool.c +++ b/bin/varnishd/mgt_pool.c @@ -227,5 +227,16 @@ const struct parspec WRK_parspec[] = { "many threads into the limited address space.\n", EXPERIMENTAL, "-1", "bytes" }, + { "thread_pool_workspace", tweak_uint, &master.wthread_workspace, + 1024, UINT_MAX, + "Bytes of HTTP protocol workspace allocated for worker " + "threads. " + "This space must be big enough for the backend request " + "and responses, and response to the client plus any other " + "memory needs in the VCL code." + "Minimum is 1024 bytes.", + DELAYED_EFFECT, + "65536", + "bytes" }, { NULL, NULL, NULL } }; From tfheen at varnish-cache.org Wed Feb 23 13:28:52 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Wed, 23 Feb 2011 14:28:52 +0100 Subject: [master] fe5b71e Move all libs but libvarnishapi to a private directory, drop soname number Message-ID: commit fe5b71ee98a5d37db6ac7775ab525e0d166fceba Author: Tollef Fog Heen Date: Tue Feb 22 10:05:29 2011 +0100 Move all libs but libvarnishapi to a private directory, drop soname number As we don't want anybody linking against libvarnish, libvcl and the other libraries, move those to pkglibdir. In addition, to further emphasize that they do not have a stable ABI, drop the version from the soname. diff --git a/lib/libvarnish/Makefile.am b/lib/libvarnish/Makefile.am index 9d1056c..dcec5e1 100644 --- a/lib/libvarnish/Makefile.am +++ b/lib/libvarnish/Makefile.am @@ -2,9 +2,9 @@ INCLUDES = -I$(top_srcdir)/include @PCRE_CFLAGS@ -lib_LTLIBRARIES = libvarnish.la +pkglib_LTLIBRARIES = libvarnish.la -libvarnish_la_LDFLAGS = -version-info 1:0:0 +libvarnish_la_LDFLAGS = -avoid-version libvarnish_la_SOURCES = \ argv.c \ diff --git a/lib/libvarnishcompat/Makefile.am b/lib/libvarnishcompat/Makefile.am index 76d4986..f5b363e 100644 --- a/lib/libvarnishcompat/Makefile.am +++ b/lib/libvarnishcompat/Makefile.am @@ -2,9 +2,9 @@ INCLUDES = -I$(top_srcdir)/include -lib_LTLIBRARIES = libvarnishcompat.la +pkglib_LTLIBRARIES = libvarnishcompat.la -libvarnishcompat_la_LDFLAGS = -version-info 1:0:0 +libvarnishcompat_la_LDFLAGS = -avoid-version libvarnishcompat_la_SOURCES = \ daemon.c \ diff --git a/lib/libvcl/Makefile.am b/lib/libvcl/Makefile.am index aab8749..c594885 100644 --- a/lib/libvcl/Makefile.am +++ b/lib/libvcl/Makefile.am @@ -2,9 +2,9 @@ INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include -lib_LTLIBRARIES = libvcl.la +pkglib_LTLIBRARIES = libvcl.la -libvcl_la_LDFLAGS = -version-info 1:0:0 +libvcl_la_LDFLAGS = -avoid-version libvcl_la_SOURCES = \ vcc_priv.h \ diff --git a/lib/libvgz/Makefile.am b/lib/libvgz/Makefile.am index ab9b561..a00e22b 100644 --- a/lib/libvgz/Makefile.am +++ b/lib/libvgz/Makefile.am @@ -1,8 +1,8 @@ # $Id$ -lib_LTLIBRARIES = libvgz.la +pkglib_LTLIBRARIES = libvgz.la -libvgz_la_LDFLAGS = -version-info 1:0:0 +libvgz_la_LDFLAGS = -avoid-version libvgz_la_CFLAGS = -D_LARGEFILE64_SOURCE=1 $(libvgz_extra_cflags) libvgz_la_SOURCES = \ From tfheen at varnish-cache.org Wed Feb 23 13:28:52 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Wed, 23 Feb 2011 14:28:52 +0100 Subject: [master] f35a9b2 Drop version from libvmod_std Message-ID: commit f35a9b2c14a2f66114d6c74cc0d14240623e62ce Author: Tollef Fog Heen Date: Tue Feb 22 10:07:08 2011 +0100 Drop version from libvmod_std There is no reason for libvmod_std to have a version number, so drop it and adjust test cases accordingly. diff --git a/bin/varnishtest/tests/m00000.vtc b/bin/varnishtest/tests/m00000.vtc index e44dbb2..c73a52e 100644 --- a/bin/varnishtest/tests/m00000.vtc +++ b/bin/varnishtest/tests/m00000.vtc @@ -8,7 +8,7 @@ server s1 { } -start varnish v1 -vcl+backend { - import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so.1" ; + import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so" ; sub vcl_deliver { set resp.http.foo = std.toupper(resp.http.foo); diff --git a/bin/varnishtest/tests/m00001.vtc b/bin/varnishtest/tests/m00001.vtc index f65dcbd..e8b11cf 100644 --- a/bin/varnishtest/tests/m00001.vtc +++ b/bin/varnishtest/tests/m00001.vtc @@ -8,7 +8,7 @@ server s1 { } -start varnish v1 -arg "-pthread_pools=1" -vcl+backend { - import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so.1" ; + import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so" ; sub vcl_deliver { set resp.http.foo = std.toupper(resp.http.foo); diff --git a/bin/varnishtest/tests/m00002.vtc b/bin/varnishtest/tests/m00002.vtc index 8b5fd16..a6193ad 100644 --- a/bin/varnishtest/tests/m00002.vtc +++ b/bin/varnishtest/tests/m00002.vtc @@ -11,7 +11,7 @@ server s1 { } -start varnish v1 -vcl+backend { - import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so.1" ; + import std from "${topbuild}/lib/libvmod_std/.libs/libvmod_std.so" ; sub vcl_fetch { set beresp.http.rnd1 = std.random(0,1); diff --git a/lib/libvcl/vcc_vmod.c b/lib/libvcl/vcc_vmod.c index 8e679fb..45fe1cd 100644 --- a/lib/libvcl/vcc_vmod.c +++ b/lib/libvcl/vcc_vmod.c @@ -98,7 +98,7 @@ vcc_ParseImport(struct vcc tl) bprintf(fn, "%s", tl->t->dec); vcc_NextToken(tl); } else { - bprintf(fn, "%s/libvmod_%.s.so.1", tl->vmod_dir, PF(mod)); + bprintf(fn, "%s/libvmod_%.s.so", tl->vmod_dir, PF(mod)); } Fh(tl, 0, "static void VGC_vmod_%.s;\n", PF(mod)); diff --git a/lib/libvmod_std/Makefile.am b/lib/libvmod_std/Makefile.am index 36147fe..5437c02 100644 --- a/lib/libvmod_std/Makefile.am +++ b/lib/libvmod_std/Makefile.am @@ -5,7 +5,7 @@ INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include vmoddir = $(pkglibdir)/vmods vmod_LTLIBRARIES = libvmod_std.la -libvmod_std_la_LDFLAGS = -version-info 1:0:0 +libvmod_std_la_LDFLAGS = -avoid-version libvmod_std_la_SOURCES = \ vcc_if.c \ From tfheen at varnish-cache.org Wed Feb 23 13:28:53 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Wed, 23 Feb 2011 14:28:53 +0100 Subject: [master] 7b9df55 Merge branch 'privlibdir' Message-ID: commit 7b9df5576bcc41d2d90bff13b9714996ea303b02 Merge: 841596c f35a9b2 Author: Tollef Fog Heen Date: Wed Feb 23 14:28:40 2011 +0100 Merge branch 'privlibdir' diff --cc lib/libvarnish/Makefile.am index 3a01d76,dcec5e1..cb6fc37 --- a/lib/libvarnish/Makefile.am +++ b/lib/libvarnish/Makefile.am @@@ -1,8 -1,10 +1,8 @@@ -# $Id$ - INCLUDES = -I$(top_srcdir)/include @PCRE_CFLAGS@ - lib_LTLIBRARIES = libvarnish.la + pkglib_LTLIBRARIES = libvarnish.la - libvarnish_la_LDFLAGS = -version-info 1:0:0 + libvarnish_la_LDFLAGS = -avoid-version libvarnish_la_SOURCES = \ argv.c \ From phk at varnish-cache.org Wed Feb 23 15:04:55 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 23 Feb 2011 16:04:55 +0100 Subject: [master] e5e43bd Make sure to initialize the the {do|is}_gzip variables to zero before we start setting them otherwise. Message-ID: commit e5e43bd262b1b5db35d56a798ac1ce1d8c6b8b50 Author: Poul-Henning Kamp Date: Wed Feb 23 15:04:24 2011 +0000 Make sure to initialize the the {do|is}_gzip variables to zero before we start setting them otherwise. diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 30c9f32..ac370be 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -1153,6 +1153,11 @@ cnt_recv(struct sess sp) return (0); } + sp->wrk->is_gzip = 0; + sp->wrk->is_gunzip = 0; + sp->wrk->do_gzip = 0; + sp->wrk->do_gunzip = 0; + if (params->http_gzip_support && (recv_handling != VCL_RET_PIPE) && (recv_handling != VCL_RET_PASS)) { diff --git a/bin/varnishtest/tests/r00861.vtc b/bin/varnishtest/tests/r00861.vtc index 38db46b..6842cbd 100644 --- a/bin/varnishtest/tests/r00861.vtc +++ b/bin/varnishtest/tests/r00861.vtc @@ -36,6 +36,8 @@ varnish v1 \ client c1 { txreq -url "/1" rxresp + expect resp.http.Content-Encoding == resp.http.Content-Encoding + expect resp.bodylen == 22 txreq -url "/barf" -hdr "Accept-Encoding: gzip" rxresp From phk at varnish-cache.org Wed Feb 23 15:14:08 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Wed, 23 Feb 2011 16:14:08 +0100 Subject: [master] 1c7c400 Also reduce worker thread workspace size for 32bit systems now that it has its own parameter. Message-ID: commit 1c7c40056d6ec15b11220ce5a022a3acdf1020a0 Author: Poul-Henning Kamp Date: Wed Feb 23 15:13:44 2011 +0000 Also reduce worker thread workspace size for 32bit systems now that it has its own parameter. diff --git a/bin/varnishd/varnishd.c b/bin/varnishd/varnishd.c index 0e16da7..9583cc4 100644 --- a/bin/varnishd/varnishd.c +++ b/bin/varnishd/varnishd.c @@ -418,6 +418,9 @@ main(int argc, char * const argv) MCF_ParamSet(cli, "sess_workspace", "16384"); cli_check(cli); + MCF_ParamSet(cli, "thread_pool_workspace", "16384"); + cli_check(cli); + MCF_ParamSet(cli, "thread_pool_stack", "32bit"); cli_check(cli); From phk at varnish-cache.org Thu Feb 24 07:28:46 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 24 Feb 2011 08:28:46 +0100 Subject: [master] 0956e00 white space cleanup Message-ID: commit 0956e00858e2a2b5a27e5c675b19b26ad74f0b32 Author: Poul-Henning Kamp Date: Thu Feb 24 07:28:39 2011 +0000 white space cleanup diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index ac370be..844103c 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -655,7 +655,7 @@ cnt_fetch(struct sess sp) l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx") + sizeof(void ); if (sp->wrk->ttl < sp->t_req + params->shortlived || - sp->objcore == NULL) + sp->objcore == NULL) sp->wrk->storage_hint = TRANSIENT_STORAGE; sp->obj = STV_NewObject(sp, sp->wrk->storage_hint, l, diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c index 1da3532..5d2e588 100644 --- a/bin/varnishd/cache_expire.c +++ b/bin/varnishd/cache_expire.c @@ -268,7 +268,7 @@ exp_timer(struct sess sp, void priv) / if (oc->timer_when > t) t = TIM_real(); - if (oc->timer_when > t) { + if (oc->timer_when > t) { Lck_Unlock(&exp_mtx); oc = NULL; continue; diff --git a/bin/varnishd/cache_hash.c b/bin/varnishd/cache_hash.c index a33bf5e..b3363cc 100644 --- a/bin/varnishd/cache_hash.c +++ b/bin/varnishd/cache_hash.c @@ -483,7 +483,7 @@ hsh_rush(struct objhead oh) CHECK_OBJ_NOTNULL(wl, WAITINGLIST_MAGIC); for (u = 0; u < params->rush_exponent; u++) { sp = VTAILQ_FIRST(&wl->list); - if (sp == NULL) + if (sp == NULL) break; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); AZ(sp->wrk); @@ -700,7 +700,7 @@ HSH_Deref(struct worker w, struct objcore *oc, struct object **oo) if (oc->methods != NULL) { oc_freeobj(oc); w->stats.n_object--; - } + } FREE_OBJ(oc); w->stats.n_objectcore--; diff --git a/bin/varnishd/cache_httpconn.c b/bin/varnishd/cache_httpconn.c index 93a7820..9f2640b 100644 --- a/bin/varnishd/cache_httpconn.c +++ b/bin/varnishd/cache_httpconn.c @@ -46,7 +46,7 @@ SVNID("$Id$") * Check if we have a complete HTTP request or response yet * * Return values: - * 0 No, keep trying + * 0 No, keep trying * >0 Yes, it is this many bytes long. / diff --git a/bin/varnishd/mgt_param.c b/bin/varnishd/mgt_param.c index 7a9ba14..c215a54 100644 --- a/bin/varnishd/mgt_param.c +++ b/bin/varnishd/mgt_param.c @@ -485,7 +485,7 @@ tweak_diag_bitmap(struct cli cli, const struct parspec par, const char arg) #define WIZARD_TEXT \ "\nNB: Do not change this parameter, unless a developer tell " \ - "you to do so." + "you to do so." /* * Remember to update varnishd.1 whenever you add / remove a parameter or diff --git a/bin/varnishd/storage_persistent.h b/bin/varnishd/storage_persistent.h index 916272c..4841a2d 100644 --- a/bin/varnishd/storage_persistent.h +++ b/bin/varnishd/storage_persistent.h @@ -201,7 +201,7 @@ int smp_valid_silo(struct smp_sc sc); /-------------------------------------------------------------------- * Caculate payload of some stuff / - + static inline uint64_t smp_stuff_len(const struct smp_sc sc, unsigned stuff) { diff --git a/bin/varnishd/storage_persistent_subr.c b/bin/varnishd/storage_persistent_subr.c index 62ca3bf..03f6d28 100644 --- a/bin/varnishd/storage_persistent_subr.c +++ b/bin/varnishd/storage_persistent_subr.c @@ -178,7 +178,7 @@ smp_new_sign(const struct smp_sc sc, struct smp_signctx ctx, smp_sync_sign(ctx); } -/-------------------------------------------------------------------:e +/-------------------------------------------------------------------- * Initialize a Silo with a valid but empty structure. * * XXX: more intelligent sizing of things. From phk at varnish-cache.org Thu Feb 24 08:04:06 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 24 Feb 2011 09:04:06 +0100 Subject: [master] 543e3df More parameter tweaks based on VUG3 discussions: Message-ID: commit 543e3dfeae2b29266cdc1cba9bc1ab9ada082826 Author: Poul-Henning Kamp Date: Thu Feb 24 07:57:35 2011 +0000 More parameter tweaks based on VUG3 discussions: err_ttl: Pointless and wrongly documented. Remove and set Retry-After: in default.vcl expiry_sleep: Remove misleading advise. send_timeout: Reduce to one minute, 10 minutes was far too long. ban_lurker_sleep: Reduce sleep between successful evictions to 10msec to speed ban lurker up to max 100 objects/second. diff --git a/bin/varnishd/cache_center.c b/bin/varnishd/cache_center.c index 844103c..f6065f0 100644 --- a/bin/varnishd/cache_center.c +++ b/bin/varnishd/cache_center.c @@ -399,7 +399,6 @@ cnt_error(struct sess sp) TIM_format(TIM_real(), date); http_PrintfHeader(w, sp->fd, h, "Date: %s", date); http_PrintfHeader(w, sp->fd, h, "Server: Varnish"); - http_PrintfHeader(w, sp->fd, h, "Retry-After: %d", params->err_ttl); if (sp->err_reason != NULL) http_PutResponse(w, sp->fd, h, sp->err_reason); diff --git a/bin/varnishd/default.vcl b/bin/varnishd/default.vcl index c25f560..919e651 100644 --- a/bin/varnishd/default.vcl +++ b/bin/varnishd/default.vcl @@ -120,6 +120,7 @@ sub vcl_deliver { sub vcl_error { set obj.http.Content-Type = "text/html; charset=utf-8"; + set obj.http.Retry-After = "5"; synthetic {" commit 44f47f0f732ce55a6a705a6d5e4bba4cb6a4cdde Author: Tollef Fog Heen Date: Thu Feb 24 09:23:26 2011 +0100 Avoid double-free of DNS directors The director is freed in vcc_backend.c already, so don't free it in vcc_dir_dns.c too. Fixes: #863 diff --git a/lib/libvcl/vcc_dir_dns.c b/lib/libvcl/vcc_dir_dns.c index 3bd9d32..d5a4c34 100644 --- a/lib/libvcl/vcc_dir_dns.c +++ b/lib/libvcl/vcc_dir_dns.c @@ -368,5 +368,4 @@ vcc_ParseDnsDirector(struct vcc tl) Fc(tl, 0, "\t.ttl = %f", ttl); Fc(tl, 0, ",\n"); Fc(tl, 0, "};\n"); - Ff(tl, 0, "\tVRT_fini_dir(cli, VGCDIR(_%.s));\n", PF(tl->t_dir)); } From phk at varnish-cache.org Thu Feb 24 09:18:45 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 24 Feb 2011 10:18:45 +0100 Subject: [master] 49308a9 Polish handling of timeval and timespec a bit. Message-ID: commit 49308a9e290ffd0d2868b250e0c282de4ad6264a Author: Poul-Henning Kamp Date: Thu Feb 24 09:18:19 2011 +0000 Polish handling of timeval and timespec a bit. diff --git a/bin/varnishd/cache_acceptor.c b/bin/varnishd/cache_acceptor.c index 3204edd..e43f2c4 100644 --- a/bin/varnishd/cache_acceptor.c +++ b/bin/varnishd/cache_acceptor.c @@ -85,7 +85,7 @@ VCA_waiter_name(void) /-------------------------------------------------------------------- - * We want to get out of any kind of touble-hit TCP connections as fast + * We want to get out of any kind of trouble-hit TCP connections as fast * as absolutely possible, so we set them LINGER enabled with zero timeout, * so that even if there are outstanding write data on the socket, a close(2) * will return immediately. @@ -203,6 +203,7 @@ vca_acct(void arg) socklen_t l; struct sockaddr_storage addr_s; struct sockaddr addr; + double send_timeout = 0, sess_timeout = 0; int i; struct pollfd pfd; struct listen_sock ls; @@ -231,9 +232,10 @@ vca_acct(void arg) t0 = TIM_real(); while (1) { #ifdef SO_SNDTIMEO_WORKS - if (params->send_timeout != tv_sndtimeo.tv_sec) { + if (params->send_timeout != send_timeout) { need_test = 1; - tv_sndtimeo.tv_sec = params->send_timeout; + send_timeout = params->send_timeout; + tv_sndtimeo = TIM_timeval(send_timeout); VTAILQ_FOREACH(ls, &heritage.socks, list) { if (ls->sock < 0) continue; @@ -244,9 +246,10 @@ vca_acct(void arg) } #endif #ifdef SO_RCVTIMEO_WORKS - if (params->sess_timeout != tv_rcvtimeo.tv_sec) { + if (params->sess_timeout != sess_timeout) { need_test = 1; - tv_rcvtimeo.tv_sec = params->sess_timeout; + sess_timeout = params->sess_timeout; + tv_rcvtimeo = TIM_timeval(sess_timeout); VTAILQ_FOREACH(ls, &heritage.socks, list) { if (ls->sock < 0) continue; diff --git a/bin/varnishd/cache_waiter_ports.c b/bin/varnishd/cache_waiter_ports.c index a7a1686..a3fe421 100644 --- a/bin/varnishd/cache_waiter_ports.c +++ b/bin/varnishd/cache_waiter_ports.c @@ -236,9 +236,7 @@ vca_main(void arg) } else if (tmo > max_t) { timeout = &max_ts; } else { - / TIM_t2ts() ? see #630 / - ts.tv_sec = (int)floor(tmo); - ts.tv_nsec = 1e9 (tmo - ts.tv_sec); + ts = TIM_timespec(tmo); timeout = &ts; } } else { diff --git a/include/libvarnish.h b/include/libvarnish.h index 0493a2e..f89a763 100644 --- a/include/libvarnish.h +++ b/include/libvarnish.h @@ -92,6 +92,8 @@ time_t TIM_parse(const char p); double TIM_mono(void); double TIM_real(void); void TIM_sleep(double t); +struct timespec TIM_timespec(double t); +struct timeval TIM_timeval(double t); / from libvarnish/version.c / void varnish_version(const char ); diff --git a/lib/libvarnish/time.c b/lib/libvarnish/time.c index b322431..02d4430 100644 --- a/lib/libvarnish/time.c +++ b/lib/libvarnish/time.c @@ -161,22 +161,44 @@ TIM_parse(const char p) void TIM_sleep(double t) { +#ifdef HAVE_NANOSLEEP struct timespec ts; - ts.tv_sec = (time_t)floor(t); - ts.tv_nsec = (long)floor((t - ts.tv_sec) 1e9); + ts = TIM_timespec(t); -#ifdef HAVE_NANOSLEEP (void)nanosleep(&ts, NULL); #else - if (ts.tv_sec > 0) - (void)sleep(ts.tv_sec); - ts.tv_nsec /= 1000; - if (ts.tv_nsec > 0) - (void)usleep(ts.tv_nsec); + if (t >= 1.) { + (void)sleep(floor(t); + t -= floor(t); + } + /* XXX: usleep() is not mandated to be thread safe / + t = 1e6; + if (t > 0) + (void)usleep(floor(t)); #endif } +struct timeval +TIM_timeval(double t) +{ + struct timeval tv; + + tv.tv_sec = (time_t)trunc(t); + tv.tv_usec = (int)(1e6 * (t - tv.tv_sec)); + return (tv); +} + +struct timespec +TIM_timespec(double t) +{ + struct timespec tv; + + tv.tv_sec = (time_t)trunc(t); + tv.tv_nsec = (int)(1e9 * (t - tv.tv_sec)); + return (tv); +} + #ifdef TEST_DRIVER From phk at varnish-cache.org Thu Feb 24 09:18:45 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 24 Feb 2011 10:18:45 +0100 Subject: [master] ff1165c Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit ff1165c59237dc18f3771e45a3d397110daffc8e Merge: 49308a9 44f47f0 Author: Poul-Henning Kamp Date: Thu Feb 24 09:18:38 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Thu Feb 24 10:32:00 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Thu, 24 Feb 2011 11:32:00 +0100 Subject: [master] 466d963 Also set grace = 0 when an object is banned. Message-ID: commit 466d963158b98d7bbeb20bd5d539baea2c719a9d Author: Poul-Henning Kamp Date: Thu Feb 24 10:31:45 2011 +0000 Also set grace = 0 when an object is banned. Spotted by: Geoff diff --git a/bin/varnishd/cache_ban.c b/bin/varnishd/cache_ban.c index 50d29e3..2563f0c 100644 --- a/bin/varnishd/cache_ban.c +++ b/bin/varnishd/cache_ban.c @@ -476,6 +476,7 @@ ban_check_object(struct object o, const struct sess sp, int has_req) return (0); } else { o->ttl = 0; + o->grace = 0; oc->ban = NULL; oc_updatemeta(oc); /* BAN also changed, but that is not important any more / From tfheen at varnish-cache.org Fri Feb 25 08:58:42 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Fri, 25 Feb 2011 09:58:42 +0100 Subject: [2.1] varnish-2.1.5-1-g2650bec Message-ID: commit 2650bec1af0030b3c94f2b7a2b9fb0db53ca63e7 Author: Tollef Fog Heen Date: Fri Feb 25 09:57:28 2011 +0100 Generate sphinx's conf.py to embed version number The sphinx configuration includes the version number, so generate the configuration. diff --git a/configure.ac b/configure.ac index 82c0dd3..c5a2248 100644 --- a/configure.ac +++ b/configure.ac @@ -460,6 +460,7 @@ AC_CONFIG_FILES([ bin/varnishtop/Makefile doc/Makefile doc/sphinx/Makefile + doc/sphinx/conf.py etc/Makefile include/Makefile lib/Makefile diff --git a/doc/sphinx/conf.py b/doc/sphinx/conf.py deleted file mode 100644 index 6bdf098..0000000 --- a/doc/sphinx/conf.py +++ /dev/null @@ -1,221 +0,0 @@ -# -- coding: utf-8 -- -# -# Varnish documentation build configuration file, created by -# sphinx-quickstart on Tue Apr 20 13:02:15 2010. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys, os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.append(os.path.abspath('.')) - -# -- General configuration ----------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.') or your custom ones. -extensions = ['sphinx.ext.todo'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['=templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'Varnish' -copyright = u'2010, Varnish Project' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '-trunk' -# The full version, including alpha/beta/rc tags. -release = '-trunk' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of documents that shouldn't be included in the build. -#unused_docs = [] - -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = ['=build'] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. Major themes that come with -# Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# topp background: #437EB2 -# left column: #EEEEEE; -# h3: #222222; -# color: #222222; -# a: #336590 - - -html_theme_options = { - "bgcolor" : "white", - - "relbarbgcolor" : "#437EB2", - "relbartextcolor" : "white", - - "sidebarbgcolor" : "#EEEEEE", - "sidebartextcolor" : "#222222", - "sidebarlinkcolor" : "#336590", - - "textcolor" : "#222222", - "linkcolor" : "#336590", - - - # "codebgcolor" : "#EEEEEE", - "codetextcolor" : "#222222", - "headtextcolor" : "#222222", - "headlinkcolor" : "#336590", - - } - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['=static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_use_modindex = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'Varnishdoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'Varnish.tex', u'Varnish Documentation', - u'Varnish Project', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_use_modindex = True diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in new file mode 100644 index 0000000..d6b98ab --- /dev/null +++ b/doc/sphinx/conf.py.in @@ -0,0 +1,221 @@ +# -- coding: utf-8 -- +# +# Varnish documentation build configuration file, created by +# sphinx-quickstart on Tue Apr 20 13:02:15 2010. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.') or your custom ones. +extensions = ['sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['=templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Varnish' +copyright = u'2010, Varnish Project' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '@VERSION@' +# The full version, including alpha/beta/rc tags. +release = '@VERSION@' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = ['=build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# topp background: #437EB2 +# left column: #EEEEEE; +# h3: #222222; +# color: #222222; +# a: #336590 + + +html_theme_options = { + "bgcolor" : "white", + + "relbarbgcolor" : "#437EB2", + "relbartextcolor" : "white", + + "sidebarbgcolor" : "#EEEEEE", + "sidebartextcolor" : "#222222", + "sidebarlinkcolor" : "#336590", + + "textcolor" : "#222222", + "linkcolor" : "#336590", + + + # "codebgcolor" : "#EEEEEE", + "codetextcolor" : "#222222", + "headtextcolor" : "#222222", + "headlinkcolor" : "#336590", + + } + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +html_title = "Varnish version @VERSION@ documentation" + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['=static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Varnishdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Varnish.tex', u'Varnish Documentation', + u'Varnish Project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True From bjorn at varnish-cache.org Mon Feb 28 10:24:55 2011 From: bjorn at varnish-cache.org (=?UTF-8?Q?Bj=C3=B8rn_Ruberg?=) Date: Mon, 28 Feb 2011 11:24:55 +0100 Subject: [2.1] varnish-2.1.5-2-g465fe1f Message-ID: commit 465fe1f77f83633b6a88a2560441838bfd188e5c Author: Bjoern Ruberg Date: Wed Feb 9 19:14:34 2011 +0100 link to http://www.varnish-cache.org/trac/wiki/VCLExamplePipe diff --git a/doc/sphinx/faq/configuration.rst b/doc/sphinx/faq/configuration.rst index 89360a5..bc95821 100644 --- a/doc/sphinx/faq/configuration.rst +++ b/doc/sphinx/faq/configuration.rst @@ -9,11 +9,14 @@ VCL What is VCL? -VCL is an acronym for Varnish Configuration Language. In a VCL file, you configure how Varnish should behave. Sample VCL files will be included in this Wiki at a later stage. +VCL is an acronym for Varnish Configuration Language. In a VCL file, +you configure how Varnish should behave. Sample VCL files will be +included in this Wiki at a later stage. Where is the documentation on VCL? -We are working on documenting VCL. The `WIKI `_ contains some examples. +We are working on documenting VCL. The `WIKI +`_ contains some examples. Please also see ``man 7 vcl``. @@ -22,8 +25,10 @@ Please also see ``man 7 vcl``. Place the VCL file on the server * Telnet into the managment port. -* do a "vcl.load " in managment interface. is whatever you would like to call your new configuration. -* do a "vcl.use " to start using your new config. +* Do a "vcl.load " in managment + interface. is whatever you would like to call your new + configuration. +* Do a "vcl.use " to start using your new config. Should I use ''pipe'' or ''pass'' in my VCL code? What is the difference? @@ -35,9 +40,11 @@ request can then be handled like any other. ``pass``. ``pipe`` reads the request, pushes in onty the backend _only_ pushes bytes back and forth, with no other actions taken. -Since most HTTP clients do pipeline several requests into one -connection this might give you an undesirable result - as every -subsequent request will reuse the existing ``pipe``. +Since most HTTP clients will pipeline several requests into one +connection, this might give you an undesirable result - as every +subsequent request will reuse the existing ``pipe``. Please see `this +article ` for +more details and a workaround. Varnish versions prior to 2.0 does not support handling a request body with ``pass`` mode, so in those releases ``pipe`` is required for From phk at varnish-cache.org Mon Feb 28 11:00:03 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 12:00:03 +0100 Subject: [master] 4f9d45b remove undesired semicolons Message-ID: commit 4f9d45bcf639310da7f304002a852d9737e9a48f Author: Poul-Henning Kamp Date: Mon Feb 28 10:54:24 2011 +0000 remove undesired semicolons diff --git a/include/miniobj.h b/include/miniobj.h index fc99f44..dfe7a59 100644 --- a/include/miniobj.h +++ b/include/miniobj.h @@ -44,14 +44,14 @@ (to) = (from); \ if ((to) != NULL) \ CHECK_OBJ((to), (type_magic)); \ - } while (0); + } while (0) #define CAST_OBJ_NOTNULL(to, from, type_magic) \ do { \ (to) = (from); \ assert((to) != NULL); \ CHECK_OBJ((to), (type_magic)); \ - } while (0); + } while (0) #define REPLACE(ptr, val) \ do { \ @@ -63,4 +63,4 @@ } else { \ ptr = NULL; \ } \ - } while (0); + } while (0) From phk at varnish-cache.org Mon Feb 28 11:00:06 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 12:00:06 +0100 Subject: [master] f8b5e53 Implement the "purge" action, now that we have renamed "the facility previously known as purge" to ban. Message-ID: commit f8b5e53dffb43fb7a27e1a21d8117630a63c88bc Author: Poul-Henning Kamp Date: Mon Feb 28 10:54:47 2011 +0000 Implement the "purge" action, now that we have renamed "the facility previously known as purge" to ban. diff --git a/lib/libvcl/vcc_action.c b/lib/libvcl/vcc_action.c index 82614d8..8850909 100644 --- a/lib/libvcl/vcc_action.c +++ b/lib/libvcl/vcc_action.c @@ -364,6 +364,16 @@ parse_rollback(struct vcc tl) /--------------------------------------------------------------------/ static void +parse_purge(struct vcc tl) +{ + + vcc_NextToken(tl); + Fb(tl, 1, "VRT_purge(sp, 0, 0);\n"); +} + +/--------------------------------------------------------------------/ + +static void parse_synthetic(struct vcc tl) { vcc_NextToken(tl); @@ -402,6 +412,7 @@ static struct action_table { { "set", parse_set }, { "synthetic", parse_synthetic }, { "unset", parse_unset }, + { "purge", parse_purge, VCL_MET_MISS | VCL_MET_HIT }, { NULL, NULL } }; From phk at varnish-cache.org Mon Feb 28 11:00:09 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 12:00:09 +0100 Subject: [master] 15a2f79 Add testcase for purge in vcl hit & miss Message-ID: commit 15a2f7982ec5fa378a40a4af2fe08e9e22bf80a0 Author: Poul-Henning Kamp Date: Mon Feb 28 10:58:49 2011 +0000 Add testcase for purge in vcl hit & miss diff --git a/bin/varnishtest/tests/c00041.vtc b/bin/varnishtest/tests/c00041.vtc new file mode 100644 index 0000000..cb7b1cb --- /dev/null +++ b/bin/varnishtest/tests/c00041.vtc @@ -0,0 +1,100 @@ +# $Id$ + +test "test purging from vcl" + +server s1 { + rxreq + expect req.url == "/1" + expect req.http.foo == "foo1" + txresp -hdr "Vary: foo" -bodylen 1 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo2" + txresp -hdr "Vary: foo" -bodylen 2 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo2" + txresp -hdr "Vary: foo" -bodylen 12 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo1" + txresp -hdr "Vary: foo" -bodylen 11 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo3" + txresp -hdr "Vary: foo" -bodylen 23 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo1" + txresp -hdr "Vary: foo" -bodylen 21 + + rxreq + expect req.url == "/1" + expect req.http.foo == "foo2" + txresp -hdr "Vary: foo" -bodylen 22 + +} -start + +varnish v1 -vcl+backend { + sub vcl_miss { if (req.http.purge == "yes") { purge; } } + sub vcl_hit { if (req.http.purge == "yes") { purge; return(restart);} } +} -start + +client c1 { + txreq -url "/1" -hdr "foo: foo1" + rxresp + expect resp.status == 200 + expect resp.bodylen == 1 + + txreq -url "/1" -hdr "Foo: foo2" + rxresp + expect resp.status == 200 + expect resp.bodylen == 2 + + txreq -url "/1" -hdr "foo: foo1" + rxresp + expect resp.status == 200 + expect resp.bodylen == 1 + + txreq -url "/1" -hdr "Foo: foo2" + rxresp + expect resp.status == 200 + expect resp.bodylen == 2 + + # Purge on hit + txreq -url "/1" -hdr "Foo: foo2" -hdr "purge: yes" + rxresp + expect resp.status == 200 + expect resp.bodylen == 12 + + txreq -url "/1" -hdr "foo: foo1" + rxresp + expect resp.status == 200 + expect resp.bodylen == 11 + + # Purge on miss + txreq -url "/1" -hdr "Foo: foo3" -hdr "purge: yes" + rxresp + expect resp.status == 200 + expect resp.bodylen == 23 + + txreq -url "/1" -hdr "foo: foo1" + rxresp + expect resp.status == 200 + expect resp.bodylen == 21 + + txreq -url "/1" -hdr "Foo: foo2" + rxresp + expect resp.status == 200 + expect resp.bodylen == 22 +} -run + +varnish v1 -badvcl { + backend s1 { .host = "${s1_addr}"; } + sub vcl_recv { if (req.http.purge == "yes") { purge; } } +} From phk at varnish-cache.org Mon Feb 28 11:00:10 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 12:00:10 +0100 Subject: [master] 430d24d Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit 430d24d4f0778b46f41ecf0489a6fe3db6394e0d Merge: 15a2f79 b394df6 Author: Poul-Henning Kamp Date: Mon Feb 28 10:59:59 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Mon Feb 28 11:01:44 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 12:01:44 +0100 Subject: [master] b590d0e Change description to note that this test is outdated. Message-ID: commit b590d0e46c3ac722a355ae68b9b066a422621d16 Author: Poul-Henning Kamp Date: Mon Feb 28 11:01:23 2011 +0000 Change description to note that this test is outdated. diff --git a/bin/varnishtest/tests/c00033.vtc b/bin/varnishtest/tests/c00033.vtc index c7e9885..f9786a3 100644 --- a/bin/varnishtest/tests/c00033.vtc +++ b/bin/varnishtest/tests/c00033.vtc @@ -1,6 +1,6 @@ # $Id$ -test "real bans" +test "Inline C access to purges (could be retired)" server s1 { rxreq From tfheen at varnish-cache.org Mon Feb 28 14:04:23 2011 From: tfheen at varnish-cache.org (Tollef Fog Heen) Date: Mon, 28 Feb 2011 15:04:23 +0100 Subject: [master] e2287d2 Look for kqueue on NetBSD Message-ID: commit e2287d2d8f0ce86ef391384d80283da7011f5e13 Author: Tollef Fog Heen Date: Mon Feb 28 15:03:24 2011 +0100 Look for kqueue on NetBSD Fixes #821 diff --git a/configure.ac b/configure.ac index 914b9e7..d7f5b94 100644 --- a/configure.ac +++ b/configure.ac @@ -294,7 +294,7 @@ AC_ARG_ENABLE(kqueue, if test "$enable_kqueue" = yes; then case $target in - --freebsd | --darwin9* ) + --freebsd* | --darwin9* | --netbsd* ) AC_CHECK_FUNCS([kqueue]) ;; --bsd) From phk at varnish-cache.org Mon Feb 28 21:35:27 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 22:35:27 +0100 Subject: [master] 682f287 Uncouple the gz files from the build, we will never need them. Message-ID: commit 682f2872c642158c75a1da7419bb70155238730a Author: Poul-Henning Kamp Date: Mon Feb 28 21:34:16 2011 +0000 Uncouple the gz* files from the build, we will never need them. diff --git a/lib/libvgz/Makefile.am b/lib/libvgz/Makefile.am index a00e22b..20dc86b 100644 --- a/lib/libvgz/Makefile.am +++ b/lib/libvgz/Makefile.am @@ -12,11 +12,6 @@ libvgz_la_SOURCES = \ crc32.h \ deflate.c \ deflate.h \ - gzclose.c \ - gzguts.h \ - gzlib.c \ - gzread.c \ - gzwrite.c \ infback.c \ inffast.c \ inffast.h \ From phk at varnish-cache.org Mon Feb 28 21:35:28 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 22:35:28 +0100 Subject: [master] 89289cf Remove the gz* files. Message-ID: commit 89289cf59578650301a68f654016266a959ef123 Author: Poul-Henning Kamp Date: Mon Feb 28 21:34:57 2011 +0000 Remove the gz* files. diff --git a/lib/libvgz/gzclose.c b/lib/libvgz/gzclose.c deleted file mode 100644 index caeb99a..0000000 --- a/lib/libvgz/gzclose.c +++ /dev/null @@ -1,25 +0,0 @@ -/* gzclose.c -- zlib gzclose() function - * Copyright (C) 2004, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - / - -#include "gzguts.h" - -/ gzclose() is in a separate file so that it is linked in only if it is used. - That way the other gzclose functions can be used instead to avoid linking in - unneeded compression or decompression routines. / -int ZEXPORT gzclose(file) - gzFile file; -{ -#ifndef NO_GZCOMPRESS - gz_statep state; - - if (file == NULL) - return Z_STREAM_ERROR; - state = (gz_statep)file; - - return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); -#else - return gzclose_r(file); -#endif -} diff --git a/lib/libvgz/gzguts.h b/lib/libvgz/gzguts.h deleted file mode 100644 index 2f4bfec..0000000 --- a/lib/libvgz/gzguts.h +++ /dev/null @@ -1,132 +0,0 @@ -/ gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004, 2005, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - / - -#ifdef _LARGEFILE64_SOURCE -# ifndef _LARGEFILE_SOURCE -# define _LARGEFILE_SOURCE 1 -# endif -# ifdef _FILE_OFFSET_BITS -# undef _FILE_OFFSET_BITS -# endif -#endif - -#if ((GNUC-0) 10 + __GNUC_MINOR__-0 >= 33) && !defined(NO_VIZ) -# define ZLIB_INTERNAL attribute((visibility ("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - -#include -#include "vgz.h" -#ifdef STDC -# include -# include -# include -#endif -#include - -#ifdef NO_DEFLATE /* for compatibility with old definition / -# define NO_GZCOMPRESS -#endif - -#ifdef _MSC_VER -# include -# define vsnprintf _vsnprintf -#endif - -#ifndef local -# define local static -#endif -/ compile with -Dlocal if your debugger can't find static symbols / - -/ gz* functions always use library allocation functions / -#ifndef STDC - extern voidp malloc OF((uInt size)); - extern void free OF((voidpf ptr)); -#endif - -/ get errno and strerror definition / -#if defined UNDER_CE -# include -# define zstrerror() gz_strwinerror((DWORD)GetLastError()) -#else -# ifdef STDC -# include -# define zstrerror() strerror(errno) -# else -# define zstrerror() "stdio error (consult errno)" -# endif -#endif - -/ provide prototypes for these when building zlib without LFS / -#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 - ZEXTERN gzFile ZEXPORT gzopen64 OF((const char , const char )); - ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); - ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); - ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); -#endif - -/ default i/o buffer size -- double this for output when reading / -#define GZBUFSIZE 8192 - -/ gzip modes, also provide a little integrity check on the passed structure / -#define GZ_NONE 0 -#define GZ_READ 7247 -#define GZ_WRITE 31153 -#define GZ_APPEND 1 / mode set to GZ_WRITE after the file is opened / - -/ values for gz_state how / -#define LOOK 0 / look for a gzip header / -#define COPY 1 / copy input directly / -#define GZIP 2 / decompress a gzip stream / - -/ internal gzip file state data structure / -typedef struct { - / used for both reading and writing / - int mode; / see gzip modes above / - int fd; / file descriptor / - char path; /* path or fd for error messages / - z_off64_t pos; / current position in uncompressed data / - unsigned size; / buffer size, zero if not allocated yet / - unsigned want; / requested buffer size, default is GZBUFSIZE / - unsigned char in; /* input buffer / - unsigned char out; /* output buffer (double-sized when reading) / - unsigned char next; /* next output data to deliver or write / - / just for reading / - unsigned have; / amount of output data unused at next / - int eof; / true if end of input file reached / - z_off64_t start; / where the gzip data started, for rewinding / - z_off64_t raw; / where the raw data started, for seeking / - int how; / 0: get header, 1: copy, 2: decompress / - int direct; / true if last read direct, false if gzip / - / just for writing / - int level; / compression level / - int strategy; / compression strategy / - / seek request / - z_off64_t skip; / amount to skip (already rewound if backwards) / - int seek; / true if seek request pending / - / error information / - int err; / error code / - char msg; /* error message / - / zlib inflate or deflate stream / - z_stream strm; / stream structure in-place (not a pointer) / -} gz_state; -typedef gz_state FAR gz_statep; - -/* shared functions / -void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char )); -#if defined UNDER_CE -char ZLIB_INTERNAL gz_strwinerror OF((DWORD error)); -#endif - -/ GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t - value -- needed when comparing unsigned to z_off64_t, which is signed - (possible z_off64_t types off_t, off64_t, and long are all signed) / -#ifdef INT_MAX -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) -#else -unsigned ZLIB_INTERNAL gz_intmax OF((void)); -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) -#endif diff --git a/lib/libvgz/gzlib.c b/lib/libvgz/gzlib.c deleted file mode 100644 index 3f6fb25..0000000 --- a/lib/libvgz/gzlib.c +++ /dev/null @@ -1,537 +0,0 @@ -/ gzlib.c -- zlib functions common to reading and writing gzip files - * Copyright (C) 2004, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - / - -#include "gzguts.h" - -#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 -# define LSEEK lseek64 -#else -# define LSEEK lseek -#endif - -/ Local functions / -local void gz_reset OF((gz_statep)); -local gzFile gz_open OF((const char , int, const char )); - -#if defined UNDER_CE - -/ Map the Windows error number in ERROR to a locale-dependent error message - string and return a pointer to it. Typically, the values for ERROR come - from GetLastError. - - The string pointed to shall not be modified by the application, but may be - overwritten by a subsequent call to gz_strwinerror - - The gz_strwinerror function does not change the current setting of - GetLastError. / -char ZLIB_INTERNAL gz_strwinerror (error) - DWORD error; -{ - static char buf[1024]; - - wchar_t msgbuf; - DWORD lasterr = GetLastError(); - DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM - | FORMAT_MESSAGE_ALLOCATE_BUFFER, - NULL, - error, - 0, / Default language / - (LPVOID)&msgbuf, - 0, - NULL); - if (chars != 0) { - / If there is an \r\n appended, zap it. / - if (chars >= 2 - && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { - chars -= 2; - msgbuf[chars] = 0; - } - - if (chars > sizeof (buf) - 1) { - chars = sizeof (buf) - 1; - msgbuf[chars] = 0; - } - - wcstombs(buf, msgbuf, chars + 1); - LocalFree(msgbuf); - } - else { - sprintf(buf, "unknown win32 error (%ld)", error); - } - - SetLastError(lasterr); - return buf; -} - -#endif / UNDER_CE / - -/ Reset gzip file state / -local void gz_reset(state) - gz_statep state; -{ - if (state->mode == GZ_READ) { / for reading ... / - state->have = 0; / no output data available / - state->eof = 0; / not at end of file / - state->how = LOOK; / look for gzip header / - state->direct = 1; / default for empty file / - } - state->seek = 0; / no seek request pending / - gz_error(state, Z_OK, NULL); / clear error / - state->pos = 0; / no uncompressed data yet / - state->strm.avail_in = 0; / no input data yet / -} - -/ Open a gzip file either by name or file descriptor. / -local gzFile gz_open(path, fd, mode) - const char path; - int fd; - const char mode; -{ - gz_statep state; - - / allocate gzFile structure to return / - state = malloc(sizeof(gz_state)); - if (state == NULL) - return NULL; - state->size = 0; / no buffers allocated yet / - state->want = GZBUFSIZE; / requested buffer size / - state->msg = NULL; / no error message yet / - - / interpret mode / - state->mode = GZ_NONE; - state->level = Z_DEFAULT_COMPRESSION; - state->strategy = Z_DEFAULT_STRATEGY; - while (mode) { - if (mode >= '0' && mode <= '9') - state->level = mode - '0'; - else - switch (mode) { - case 'r': - state->mode = GZ_READ; - break; -#ifndef NO_GZCOMPRESS - case 'w': - state->mode = GZ_WRITE; - break; - case 'a': - state->mode = GZ_APPEND; - break; -#endif - case '+': /* can't read and write at the same time / - free(state); - return NULL; - case 'b': / ignore -- will request binary anyway / - break; - case 'f': - state->strategy = Z_FILTERED; - break; - case 'h': - state->strategy = Z_HUFFMAN_ONLY; - break; - case 'R': - state->strategy = Z_RLE; - break; - case 'F': - state->strategy = Z_FIXED; - default: / could consider as an error, but just ignore / - ; - } - mode++; - } - - / must provide an "r", "w", or "a" / - if (state->mode == GZ_NONE) { - free(state); - return NULL; - } - - / save the path name for error messages / - state->path = malloc(strlen(path) + 1); - if (state->path == NULL) { - free(state); - return NULL; - } - strcpy(state->path, path); - - / open the file with the appropriate mode (or just use fd) / - state->fd = fd != -1 ? fd : - open(path, -#ifdef O_LARGEFILE - O_LARGEFILE | -#endif -#ifdef O_BINARY - O_BINARY | -#endif - (state->mode == GZ_READ ? - O_RDONLY : - (O_WRONLY | O_CREAT | ( - state->mode == GZ_WRITE ? - O_TRUNC : - O_APPEND))), - 0666); - if (state->fd == -1) { - free(state->path); - free(state); - return NULL; - } - if (state->mode == GZ_APPEND) - state->mode = GZ_WRITE; / simplify later checks / - - / save the current position for rewinding (only if reading) / - if (state->mode == GZ_READ) { - state->start = LSEEK(state->fd, 0, SEEK_CUR); - if (state->start == -1) state->start = 0; - } - - / initialize stream / - gz_reset(state); - - / return stream / - return (gzFile)state; -} - -/ -- see zlib.h -- / -gzFile ZEXPORT gzopen(path, mode) - const char path; - const char mode; -{ - return gz_open(path, -1, mode); -} - -/ -- see zlib.h -- / -gzFile ZEXPORT gzopen64(path, mode) - const char path; - const char mode; -{ - return gz_open(path, -1, mode); -} - -/ -- see zlib.h -- / -gzFile ZEXPORT gzdopen(fd, mode) - int fd; - const char mode; -{ - char path; / identifier for error messages / - gzFile gz; - - if (fd == -1 || (path = malloc(7 + 3 sizeof(int))) == NULL) - return NULL; - sprintf(path, "", fd); /* for debugging / - gz = gz_open(path, fd, mode); - free(path); - return gz; -} - -/ -- see zlib.h -- / -int ZEXPORT gzbuffer(file, size) - gzFile file; - unsigned size; -{ - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return -1; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return -1; - - / make sure we haven't already allocated memory / - if (state->size != 0) - return -1; - - / check and set requested size / - if (size == 0) - return -1; - state->want = size; - return 0; -} - -/ -- see zlib.h -- / -int ZEXPORT gzrewind(file) - gzFile file; -{ - gz_statep state; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - - / check that we're reading and that there's no error / - if (state->mode != GZ_READ || state->err != Z_OK) - return -1; - - / back up and start over / - if (LSEEK(state->fd, state->start, SEEK_SET) == -1) - return -1; - gz_reset(state); - return 0; -} - -/ -- see zlib.h -- / -z_off64_t ZEXPORT gzseek64(file, offset, whence) - gzFile file; - z_off64_t offset; - int whence; -{ - unsigned n; - z_off64_t ret; - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return -1; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return -1; - - / check that there's no error / - if (state->err != Z_OK) - return -1; - - / can only seek from start or relative to current position / - if (whence != SEEK_SET && whence != SEEK_CUR) - return -1; - - / normalize offset to a SEEK_CUR specification / - if (whence == SEEK_SET) - offset -= state->pos; - else if (state->seek) - offset += state->skip; - state->seek = 0; - - / if within raw area while reading, just go there / - if (state->mode == GZ_READ && state->how == COPY && - state->pos + offset >= state->raw) { - ret = LSEEK(state->fd, offset - state->have, SEEK_CUR); - if (ret == -1) - return -1; - state->have = 0; - state->eof = 0; - state->seek = 0; - gz_error(state, Z_OK, NULL); - state->strm.avail_in = 0; - state->pos += offset; - return state->pos; - } - - / calculate skip amount, rewinding if needed for back seek when reading / - if (offset < 0) { - if (state->mode != GZ_READ) / writing -- can't go backwards / - return -1; - offset += state->pos; - if (offset < 0) / before start of file! / - return -1; - if (gzrewind(file) == -1) / rewind, then skip to offset / - return -1; - } - - / if reading, skip what's in output buffer (one less gzgetc() check) / - if (state->mode == GZ_READ) { - n = GT_OFF(state->have) || (z_off64_t)state->have > offset ? - (unsigned)offset : state->have; - state->have -= n; - state->next += n; - state->pos += n; - offset -= n; - } - - / request skip (if not zero) / - if (offset) { - state->seek = 1; - state->skip = offset; - } - return state->pos + offset; -} - -/ -- see zlib.h -- / -z_off_t ZEXPORT gzseek(file, offset, whence) - gzFile file; - z_off_t offset; - int whence; -{ - z_off64_t ret; - - ret = gzseek64(file, (z_off64_t)offset, whence); - return ret == (z_off_t)ret ? (z_off_t)ret : -1; -} - -/ -- see zlib.h -- / -z_off64_t ZEXPORT gztell64(file) - gzFile file; -{ - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return -1; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return -1; - - / return position / - return state->pos + (state->seek ? state->skip : 0); -} - -/ -- see zlib.h -- / -z_off_t ZEXPORT gztell(file) - gzFile file; -{ - z_off64_t ret; - - ret = gztell64(file); - return ret == (z_off_t)ret ? (z_off_t)ret : -1; -} - -/ -- see zlib.h -- / -z_off64_t ZEXPORT gzoffset64(file) - gzFile file; -{ - z_off64_t offset; - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return -1; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return -1; - - / compute and return effective offset in file / - offset = LSEEK(state->fd, 0, SEEK_CUR); - if (offset == -1) - return -1; - if (state->mode == GZ_READ) / reading / - offset -= state->strm.avail_in; / don't count buffered input / - return offset; -} - -/ -- see zlib.h -- / -z_off_t ZEXPORT gzoffset(file) - gzFile file; -{ - z_off64_t ret; - - ret = gzoffset64(file); - return ret == (z_off_t)ret ? (z_off_t)ret : -1; -} - -/ -- see zlib.h -- / -int ZEXPORT gzeof(file) - gzFile file; -{ - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return 0; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return 0; - - / return end-of-file state / - return state->mode == GZ_READ ? - (state->eof && state->strm.avail_in == 0 && state->have == 0) : 0; -} - -/ -- see zlib.h -- / -const char ZEXPORT gzerror(file, errnum) - gzFile file; - int errnum; -{ - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return NULL; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return NULL; - - / return error information / - if (errnum != NULL) - errnum = state->err; - return state->msg == NULL ? "" : state->msg; -} - -/* -- see zlib.h -- / -void ZEXPORT gzclearerr(file) - gzFile file; -{ - gz_statep state; - - / get internal structure and check integrity / - if (file == NULL) - return; - state = (gz_statep)file; - if (state->mode != GZ_READ && state->mode != GZ_WRITE) - return; - - / clear error and end-of-file / - if (state->mode == GZ_READ) - state->eof = 0; - gz_error(state, Z_OK, NULL); -} - -/ Create an error message in allocated memory and set state->err and - state->msg accordingly. Free any previous error message already there. Do - not try to free or allocate space if the error is Z_MEM_ERROR (out of - memory). Simply save the error message as a static string. If there is an - allocation failure constructing the error message, then convert the error to - out of memory. / -void ZLIB_INTERNAL gz_error(state, err, msg) - gz_statep state; - int err; - const char msg; -{ - /* free previously allocated message and clear / - if (state->msg != NULL) { - if (state->err != Z_MEM_ERROR) - free(state->msg); - state->msg = NULL; - } - - / set error code, and if no message, then done / - state->err = err; - if (msg == NULL) - return; - - / for an out of memory error, save as static string / - if (err == Z_MEM_ERROR) { - state->msg = (char )(uintptr_t)msg; - return; - } - - /* construct error message with path / - if ((state->msg = malloc(strlen(state->path) + strlen(msg) + 3)) == NULL) { - state->err = Z_MEM_ERROR; - state->msg = (char )(uintptr_t)"out of memory"; - return; - } - strcpy(state->msg, state->path); - strcat(state->msg, ": "); - strcat(state->msg, msg); - return; -} - -#ifndef INT_MAX -/* portably return maximum value for an int (when limits.h presumed not - available) -- we need to do this to cover cases where 2's complement not - used, since C standard permits 1's complement and sign-bit representations, - otherwise we could just use ((unsigned)-1) >> 1 / -unsigned ZLIB_INTERNAL gz_intmax() -{ - unsigned p, q; - - p = 1; - do { - q = p; - p <<= 1; - p++; - } while (p > q); - return q >> 1; -} -#endif diff --git a/lib/libvgz/gzread.c b/lib/libvgz/gzread.c deleted file mode 100644 index 548201a..0000000 --- a/lib/libvgz/gzread.c +++ /dev/null @@ -1,653 +0,0 @@ -/ gzread.c -- zlib functions for reading gzip files - * Copyright (C) 2004, 2005, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - / - -#include "gzguts.h" - -/ Local functions / -local int gz_load OF((gz_statep, unsigned char , unsigned, unsigned )); -local int gz_avail OF((gz_statep)); -local int gz_next4 OF((gz_statep, unsigned long )); -local int gz_head OF((gz_statep)); -local int gz_decomp OF((gz_statep)); -local int gz_make OF((gz_statep)); -local int gz_skip OF((gz_statep, z_off64_t)); - -/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from - state->fd, and update state->eof, state->err, and state->msg as appropriate. - This function needs to loop on read(), since read() is not guaranteed to - read the number of bytes requested, depending on the type of descriptor. / -local int gz_load(state, buf, len, have) - gz_statep state; - unsigned char buf; - unsigned len; - unsigned have; -{ - int ret; - - have = 0; - do { - ret = read(state->fd, buf + have, len - have); - if (ret <= 0) - break; - have += ret; - } while (have < len); - if (ret < 0) { - gz_error(state, Z_ERRNO, zstrerror()); - return -1; - } - if (ret == 0) - state->eof = 1; - return 0; -} - -/* Load up input buffer and set eof flag if last data loaded -- return -1 on - error, 0 otherwise. Note that the eof flag is set when the end of the input - file is reached, even though there may be unused data in the buffer. Once - that data has been used, no more attempts will be made to read the file. - gz_avail() assumes that strm->avail_in == 0. / -local int gz_avail(state) - gz_statep state; -{ - z_streamp strm = &(state->strm); - - if (state->err != Z_OK) - return -1; - if (state->eof == 0) { - if (gz_load(state, state->in, state->size, - (unsigned )&(strm->avail_in)) == -1) - return -1; - strm->next_in = state->in; - } - return 0; -} - -/* Get next byte from input, or -1 if end or error. / -#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \ - (strm->avail_in == 0 ? -1 : \ - (strm->avail_in--, (strm->next_in)++))) - -/* Get a four-byte little-endian integer and return 0 on success and the value - in ret. Otherwise -1 is returned and ret is not modified. / -local int gz_next4(state, ret) - gz_statep state; - unsigned long ret; -{ - int ch; - unsigned long val; - z_streamp strm = &(state->strm); - - val = NEXT(); - val += (unsigned)NEXT() << 8; - val += (unsigned long)NEXT() << 16; - ch = NEXT(); - if (ch == -1) - return -1; - val += (unsigned long)ch << 24; - ret = val; - return 0; -} - -/ Look for gzip header, set up for inflate or copy. state->have must be zero. - If this is the first time in, allocate required memory. state->how will be - left unchanged if there is no more input data available, will be set to COPY - if there is no gzip header and direct copying will be performed, or it will - be set to GZIP for decompression, and the gzip header will be skipped so - that the next available input data is the raw deflate stream. If direct - copying, then leftover input data from the input buffer will be copied to - the output buffer. In that case, all further file reads will be directly to - either the output buffer or a user buffer. If decompressing, the inflate - state and the check value will be initialized. gz_head() will return 0 on - success or -1 on failure. Failures may include read errors or gzip header - errors. / -local int gz_head(state) - gz_statep state; -{ - z_streamp strm = &(state->strm); - int flags; - unsigned len; - - / allocate read buffers and inflate memory / - if (state->size == 0) { - / allocate buffers / - state->in = malloc(state->want); - state->out = malloc(state->want << 1); - if (state->in == NULL || state->out == NULL) { - if (state->out != NULL) - free(state->out); - if (state->in != NULL) - free(state->in); - gz_error(state, Z_MEM_ERROR, "out of memory"); - return -1; - } - state->size = state->want; - - / allocate inflate memory / - state->strm.zalloc = Z_NULL; - state->strm.zfree = Z_NULL; - state->strm.opaque = Z_NULL; - state->strm.avail_in = 0; - state->strm.next_in = Z_NULL; - if (inflateInit2(&(state->strm), -15) != Z_OK) { / raw inflate / - free(state->out); - free(state->in); - state->size = 0; - gz_error(state, Z_MEM_ERROR, "out of memory"); - return -1; - } - } - - / get some data in the input buffer / - if (strm->avail_in == 0) { - if (gz_avail(state) == -1) - return -1; - if (strm->avail_in == 0) - return 0; - } - - / look for the gzip magic header bytes 31 and 139 / - if (strm->next_in[0] == 31) { - strm->avail_in--; - strm->next_in++; - if (strm->avail_in == 0 && gz_avail(state) == -1) - return -1; - if (strm->avail_in && strm->next_in[0] == 139) { - / we have a gzip header, woo hoo! / - strm->avail_in--; - strm->next_in++; - - / skip rest of header / - if (NEXT() != 8) { / compression method / - gz_error(state, Z_DATA_ERROR, "unknown compression method"); - return -1; - } - flags = NEXT(); - if (flags & 0xe0) { / reserved flag bits / - gz_error(state, Z_DATA_ERROR, "unknown header flags set"); - return -1; - } - NEXT(); / modification time / - NEXT(); - NEXT(); - NEXT(); - NEXT(); / extra flags / - NEXT(); / operating system / - if (flags & 4) { / extra field / - len = (unsigned)NEXT(); - len += (unsigned)NEXT() << 8; - while (len--) - if (NEXT() < 0) - break; - } - if (flags & 8) / file name / - while (NEXT() > 0) - ; - if (flags & 16) / comment / - while (NEXT() > 0) - ; - if (flags & 2) { / header crc / - NEXT(); - NEXT(); - } - / an unexpected end of file is not checked for here -- it will be - noticed on the first request for uncompressed data / - - / set up for decompression / - inflateReset(strm); - strm->adler = crc32(0L, Z_NULL, 0); - state->how = GZIP; - state->direct = 0; - return 0; - } - else { - / not a gzip file -- save first byte (31) and fall to raw i/o / - state->out[0] = 31; - state->have = 1; - } - } - - / doing raw i/o, save start of raw data for seeking, copy any leftover - input to output -- this assumes that the output buffer is larger than - the input buffer, which also assures space for gzungetc() / - state->raw = state->pos; - state->next = state->out; - if (strm->avail_in) { - memcpy(state->next + state->have, strm->next_in, strm->avail_in); - state->have += strm->avail_in; - strm->avail_in = 0; - } - state->how = COPY; - state->direct = 1; - return 0; -} - -/ Decompress from input to the provided next_out and avail_out in the state. - If the end of the compressed data is reached, then verify the gzip trailer - check value and length (modulo 2^32). state->have and state->next are set - to point to the just decompressed data, and the crc is updated. If the - trailer is verified, state->how is reset to LOOK to look for the next gzip - stream or raw data, once state->have is depleted. Returns 0 on success, -1 - on failure. Failures may include invalid compressed data or a failed gzip - trailer verification. / -local int gz_decomp(state) - gz_statep state; -{ - int ret; - unsigned had; - unsigned long crc, len; - z_streamp strm = &(state->strm); - - / fill output buffer up to end of deflate stream / - had = strm->avail_out; - do { - / get more input for inflate() / - if (strm->avail_in == 0 && gz_avail(state) == -1) - return -1; - if (strm->avail_in == 0) { - gz_error(state, Z_DATA_ERROR, "unexpected end of file"); - return -1; - } - - / decompress and handle errors / - ret = inflate(strm, Z_NO_FLUSH); - if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { - gz_error(state, Z_STREAM_ERROR, - "internal error: inflate stream corrupt"); - return -1; - } - if (ret == Z_MEM_ERROR) { - gz_error(state, Z_MEM_ERROR, "out of memory"); - return -1; - } - if (ret == Z_DATA_ERROR) { / deflate stream invalid / - gz_error(state, Z_DATA_ERROR, - strm->msg == NULL ? "compressed data error" : strm->msg); - return -1; - } - } while (strm->avail_out && ret != Z_STREAM_END); - - / update available output and crc check value / - state->have = had - strm->avail_out; - state->next = strm->next_out - state->have; - strm->adler = crc32(strm->adler, state->next, state->have); - - / check gzip trailer if at end of deflate stream / - if (ret == Z_STREAM_END) { - if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) { - gz_error(state, Z_DATA_ERROR, "unexpected end of file"); - return -1; - } - if (crc != strm->adler) { - gz_error(state, Z_DATA_ERROR, "incorrect data check"); - return -1; - } - if (len != (strm->total_out & 0xffffffffL)) { - gz_error(state, Z_DATA_ERROR, "incorrect length check"); - return -1; - } - state->how = LOOK; / ready for next stream, once have is 0 (leave - state->direct unchanged to remember how) / - } - - / good decompression / - return 0; -} - -/ Make data and put in the output buffer. Assumes that state->have == 0. - Data is either copied from the input file or decompressed from the input - file depending on state->how. If state->how is LOOK, then a gzip header is - looked for (and skipped if found) to determine wither to copy or decompress. - Returns -1 on error, otherwise 0. gz_make() will leave state->have as COPY - or GZIP unless the end of the input file has been reached and all data has - been processed. / -local int gz_make(state) - gz_statep state; -{ - z_streamp strm = &(state->strm); - - if (state->how == LOOK) { / look for gzip header / - if (gz_head(state) == -1) - return -1; - if (state->have) / got some data from gz_head() / - return 0; - } - if (state->how == COPY) { / straight copy / - if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1) - return -1; - state->next = state->out; - } - else if (state->how == GZIP) { / decompress / - strm->avail_out = state->size << 1; - strm->next_out = state->out; - if (gz_decomp(state) == -1) - return -1; - } - return 0; -} - -/ Skip len uncompressed bytes of output. Return -1 on error, 0 on success. / -local int gz_skip(state, len) - gz_statep state; - z_off64_t len; -{ - unsigned n; - - / skip over len bytes or reach end-of-file, whichever comes first / - while (len) - / skip over whatever is in output buffer / - if (state->have) { - n = GT_OFF(state->have) || (z_off64_t)state->have > len ? - (unsigned)len : state->have; - state->have -= n; - state->next += n; - state->pos += n; - len -= n; - } - - / output buffer empty -- return if we're at the end of the input / - else if (state->eof && state->strm.avail_in == 0) - break; - - / need more data to skip -- load up output buffer / - else { - / get more output, looking for header if required / - if (gz_make(state) == -1) - return -1; - } - return 0; -} - -/ -- see zlib.h -- / -int ZEXPORT gzread(file, buf, len) - gzFile file; - voidp buf; - unsigned len; -{ - unsigned got, n; - gz_statep state; - z_streamp strm; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're reading and that there's no error / - if (state->mode != GZ_READ || state->err != Z_OK) - return -1; - - / since an int is returned, make sure len fits in one, otherwise return - with an error (this avoids the flaw in the interface) / - if ((int)len < 0) { - gz_error(state, Z_BUF_ERROR, "requested length does not fit in int"); - return -1; - } - - / if len is zero, avoid unnecessary operations / - if (len == 0) - return 0; - - / process a skip request / - if (state->seek) { - state->seek = 0; - if (gz_skip(state, state->skip) == -1) - return -1; - } - - / get len bytes to buf, or less than len if at the end / - got = 0; - do { - / first just try copying data from the output buffer / - if (state->have) { - n = state->have > len ? len : state->have; - memcpy(buf, state->next, n); - state->next += n; - state->have -= n; - } - - / output buffer empty -- return if we're at the end of the input / - else if (state->eof && strm->avail_in == 0) - break; - - / need output data -- for small len or new stream load up our output - buffer / - else if (state->how == LOOK || len < (state->size << 1)) { - / get more output, looking for header if required / - if (gz_make(state) == -1) - return -1; - continue; / no progress yet -- go back to memcpy() above / - / the copy above assures that we will leave with space in the - output buffer, allowing at least one gzungetc() to succeed / - } - - / large len -- read directly into user buffer / - else if (state->how == COPY) { / read directly / - if (gz_load(state, buf, len, &n) == -1) - return -1; - } - - / large len -- decompress directly into user buffer / - else { / state->how == GZIP / - strm->avail_out = len; - strm->next_out = buf; - if (gz_decomp(state) == -1) - return -1; - n = state->have; - state->have = 0; - } - - / update progress / - len -= n; - buf = (char )buf + n; - got += n; - state->pos += n; - } while (len); - - /* return number of bytes read into user buffer (will fit in int) / - return (int)got; -} - -/ -- see zlib.h -- / -int ZEXPORT gzgetc(file) - gzFile file; -{ - int ret; - unsigned char buf[1]; - gz_statep state; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - - / check that we're reading and that there's no error / - if (state->mode != GZ_READ || state->err != Z_OK) - return -1; - - / try output buffer (no need to check for skip request) / - if (state->have) { - state->have--; - state->pos++; - return (state->next)++; - } - - /* nothing there -- try gzread() / - ret = gzread(file, buf, 1); - return ret < 1 ? -1 : buf[0]; -} - -/ -- see zlib.h -- / -int ZEXPORT gzungetc(c, file) - int c; - gzFile file; -{ - gz_statep state; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - - / check that we're reading and that there's no error / - if (state->mode != GZ_READ || state->err != Z_OK) - return -1; - - / process a skip request / - if (state->seek) { - state->seek = 0; - if (gz_skip(state, state->skip) == -1) - return -1; - } - - / can't push EOF / - if (c < 0) - return -1; - - / if output buffer empty, put byte at end (allows more pushing) / - if (state->have == 0) { - state->have = 1; - state->next = state->out + (state->size << 1) - 1; - state->next[0] = c; - state->pos--; - return c; - } - - / if no room, give up (must have already done a gzungetc()) / - if (state->have == (state->size << 1)) { - gz_error(state, Z_BUF_ERROR, "out of room to push characters"); - return -1; - } - - / slide output data if needed and insert byte before existing data / - if (state->next == state->out) { - unsigned char src = state->out + state->have; - unsigned char dest = state->out + (state->size << 1); - while (src > state->out) - --dest = --src; - state->next = dest; - } - state->have++; - state->next--; - state->next[0] = c; - state->pos--; - return c; -} - -/ -- see zlib.h -- / -char ZEXPORT gzgets(file, buf, len) - gzFile file; - char buf; - int len; -{ - unsigned left, n; - char str; - unsigned char eol; - gz_statep state; - - / check parameters and get internal structure / - if (file == NULL || buf == NULL || len < 1) - return NULL; - state = (gz_statep)file; - - / check that we're reading and that there's no error / - if (state->mode != GZ_READ || state->err != Z_OK) - return NULL; - - / process a skip request / - if (state->seek) { - state->seek = 0; - if (gz_skip(state, state->skip) == -1) - return NULL; - } - - / copy output bytes up to new line or len - 1, whichever comes first -- - append a terminating zero to the string (we don't check for a zero in - the contents, let the user worry about that) / - str = buf; - left = (unsigned)len - 1; - if (left) do { - / assure that something is in the output buffer / - if (state->have == 0) { - if (gz_make(state) == -1) - return NULL; / error / - if (state->have == 0) { / end of file / - if (buf == str) / got bupkus / - return NULL; - break; / got something -- return it / - } - } - - / look for end-of-line in current output buffer / - n = state->have > left ? left : state->have; - eol = memchr(state->next, '\n', n); - if (eol != NULL) - n = (unsigned)(eol - state->next) + 1; - - / copy through end-of-line, or remainder if not found / - memcpy(buf, state->next, n); - state->have -= n; - state->next += n; - state->pos += n; - left -= n; - buf += n; - } while (left && eol == NULL); - - / found end-of-line or out of space -- terminate string and return it / - buf[0] = 0; - return str; -} - -/ -- see zlib.h -- / -int ZEXPORT gzdirect(file) - gzFile file; -{ - gz_statep state; - - / get internal structure / - if (file == NULL) - return 0; - state = (gz_statep)file; - - / check that we're reading / - if (state->mode != GZ_READ) - return 0; - - / if the state is not known, but we can find out, then do so (this is - mainly for right after a gzopen() or gzdopen()) / - if (state->how == LOOK && state->have == 0) - (void)gz_head(state); - - / return 1 if reading direct, 0 if decompressing a gzip stream / - return state->direct; -} - -/ -- see zlib.h -- / -int ZEXPORT gzclose_r(file) - gzFile file; -{ - int ret; - gz_statep state; - - / get internal structure / - if (file == NULL) - return Z_STREAM_ERROR; - state = (gz_statep)file; - - / check that we're reading / - if (state->mode != GZ_READ) - return Z_STREAM_ERROR; - - / free memory and close file / - if (state->size) { - inflateEnd(&(state->strm)); - free(state->out); - free(state->in); - } - gz_error(state, Z_OK, NULL); - free(state->path); - ret = close(state->fd); - free(state); - return ret ? Z_ERRNO : Z_OK; -} diff --git a/lib/libvgz/gzwrite.c b/lib/libvgz/gzwrite.c deleted file mode 100644 index 08bb9d8..0000000 --- a/lib/libvgz/gzwrite.c +++ /dev/null @@ -1,531 +0,0 @@ -/ gzwrite.c -- zlib functions for writing gzip files - * Copyright (C) 2004, 2005, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - / - -#include "gzguts.h" - -/ Local functions / -local int gz_init OF((gz_statep)); -local int gz_comp OF((gz_statep, int)); -local int gz_zero OF((gz_statep, z_off64_t)); - -/ Initialize state for writing a gzip file. Mark initialization by setting - state->size to non-zero. Return -1 on failure or 0 on success. / -local int gz_init(state) - gz_statep state; -{ - int ret; - z_streamp strm = &(state->strm); - - / allocate input and output buffers / - state->in = malloc(state->want); - state->out = malloc(state->want); - if (state->in == NULL || state->out == NULL) { - if (state->out != NULL) - free(state->out); - if (state->in != NULL) - free(state->in); - gz_error(state, Z_MEM_ERROR, "out of memory"); - return -1; - } - - / allocate deflate memory, set up for gzip compression / - strm->zalloc = Z_NULL; - strm->zfree = Z_NULL; - strm->opaque = Z_NULL; - ret = deflateInit2(strm, state->level, Z_DEFLATED, - 15 + 16, 8, state->strategy); - if (ret != Z_OK) { - free(state->in); - gz_error(state, Z_MEM_ERROR, "out of memory"); - return -1; - } - - / mark state as initialized / - state->size = state->want; - - / initialize write buffer / - strm->avail_out = state->size; - strm->next_out = state->out; - state->next = strm->next_out; - return 0; -} - -/ Compress whatever is at avail_in and next_in and write to the output file. - Return -1 if there is an error writing to the output file, otherwise 0. - flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH, - then the deflate() state is reset to start a new gzip stream. / -local int gz_comp(state, flush) - gz_statep state; - int flush; -{ - int ret, got; - unsigned have; - z_streamp strm = &(state->strm); - - / allocate memory if this is the first time through / - if (state->size == 0 && gz_init(state) == -1) - return -1; - - / run deflate() on provided input until it produces no more output / - ret = Z_OK; - do { - / write out current buffer contents if full, or if flushing, but if - doing Z_FINISH then don't write until we get to Z_STREAM_END / - if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && - (flush != Z_FINISH || ret == Z_STREAM_END))) { - have = (unsigned)(strm->next_out - state->next); - if (have && ((got = write(state->fd, state->next, have)) < 0 || - (unsigned)got != have)) { - gz_error(state, Z_ERRNO, zstrerror()); - return -1; - } - if (strm->avail_out == 0) { - strm->avail_out = state->size; - strm->next_out = state->out; - } - state->next = strm->next_out; - } - - / compress / - have = strm->avail_out; - ret = deflate(strm, flush); - if (ret == Z_STREAM_ERROR) { - gz_error(state, Z_STREAM_ERROR, - "internal error: deflate stream corrupt"); - return -1; - } - have -= strm->avail_out; - } while (have); - - / if that completed a deflate stream, allow another to start / - if (flush == Z_FINISH) - deflateReset(strm); - - / all done, no errors / - return 0; -} - -/ Compress len zeros to output. Return -1 on error, 0 on success. / -local int gz_zero(state, len) - gz_statep state; - z_off64_t len; -{ - int first; - unsigned n; - z_streamp strm = &(state->strm); - - / consume whatever's left in the input buffer / - if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) - return -1; - - / compress len zeros (len guaranteed > 0) / - first = 1; - while (len) { - n = GT_OFF(state->size) || (z_off64_t)state->size > len ? - (unsigned)len : state->size; - if (first) { - memset(state->in, 0, n); - first = 0; - } - strm->avail_in = n; - strm->next_in = state->in; - state->pos += n; - if (gz_comp(state, Z_NO_FLUSH) == -1) - return -1; - len -= n; - } - return 0; -} - -/ -- see zlib.h -- / -int ZEXPORT gzwrite(file, buf, len) - gzFile file; - voidpc buf; - unsigned len; -{ - unsigned put = len; - unsigned n; - gz_statep state; - z_streamp strm; - - / get internal structure / - if (file == NULL) - return 0; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return 0; - - / since an int is returned, make sure len fits in one, otherwise return - with an error (this avoids the flaw in the interface) / - if ((int)len < 0) { - gz_error(state, Z_BUF_ERROR, "requested length does not fit in int"); - return 0; - } - - / if len is zero, avoid unnecessary operations / - if (len == 0) - return 0; - - / allocate memory if this is the first time through / - if (state->size == 0 && gz_init(state) == -1) - return 0; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return 0; - } - - / for small len, copy to input buffer, otherwise compress directly / - if (len < state->size) { - / copy to input buffer, compress when full / - do { - if (strm->avail_in == 0) - strm->next_in = state->in; - n = state->size - strm->avail_in; - if (n > len) - n = len; - memcpy(strm->next_in + strm->avail_in, buf, n); - strm->avail_in += n; - state->pos += n; - buf = (char )((uintptr_t)buf + n); - len -= n; - if (len && gz_comp(state, Z_NO_FLUSH) == -1) - return 0; - } while (len); - } - else { - /* consume whatever's left in the input buffer / - if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) - return 0; - - / directly compress user buffer to file / - strm->avail_in = len; - strm->next_in = (voidp)(uintptr_t)buf; - state->pos += len; - if (gz_comp(state, Z_NO_FLUSH) == -1) - return 0; - } - - / input was all buffered or compressed (put will fit in int) / - return (int)put; -} - -/ -- see zlib.h -- / -int ZEXPORT gzputc(file, c) - gzFile file; - int c; -{ - unsigned char buf[1]; - gz_statep state; - z_streamp strm; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return -1; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return -1; - } - - / try writing to input buffer for speed (state->size == 0 if buffer not - initialized) / - if (strm->avail_in < state->size) { - if (strm->avail_in == 0) - strm->next_in = state->in; - strm->next_in[strm->avail_in++] = c; - state->pos++; - return c; - } - - / no room in buffer or not initialized, use gz_write() / - buf[0] = c; - if (gzwrite(file, buf, 1) != 1) - return -1; - return c; -} - -/ -- see zlib.h -- / -int ZEXPORT gzputs(file, str) - gzFile file; - const char str; -{ - int ret; - unsigned len; - - /* write string / - len = (unsigned)strlen(str); - ret = gzwrite(file, str, len); - return ret == 0 && len != 0 ? -1 : ret; -} - -#ifdef STDC -#include - -/ -- see zlib.h -- / -int ZEXPORTVA gzprintf (gzFile file, const char format, ...) -{ - int size, len; - gz_statep state; - z_streamp strm; - va_list va; - - /* get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return 0; - - / make sure we have some buffer space / - if (state->size == 0 && gz_init(state) == -1) - return 0; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return 0; - } - - / consume whatever's left in the input buffer / - if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) - return 0; - - / do the printf() into the input buffer, put length in len / - size = (int)(state->size); - state->in[size - 1] = 0; - va_start(va, format); -#ifdef NO_vsnprintf -# ifdef HAS_vsprintf_void - (void)vsprintf(state->in, format, va); - va_end(va); - for (len = 0; len < size; len++) - if (state->in[len] == 0) break; -# else - len = vsprintf(state->in, format, va); - va_end(va); -# endif -#else -# ifdef HAS_vsnprintf_void - (void)vsnprintf(state->in, size, format, va); - va_end(va); - len = strlen(state->in); -# else - len = vsnprintf((char )(state->in), size, format, va); - va_end(va); -# endif -#endif - - /* check that printf() results fit in buffer / - if (len <= 0 || len >= (int)size || state->in[size - 1] != 0) - return 0; - - / update buffer and position, defer compression until needed / - strm->avail_in = (unsigned)len; - strm->next_in = state->in; - state->pos += len; - return len; -} - -#else / !STDC / - -/ -- see zlib.h -- / -int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, - a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) - gzFile file; - const char format; - int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, - a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; -{ - int size, len; - gz_statep state; - z_streamp strm; - - /* get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return 0; - - / make sure we have some buffer space / - if (state->size == 0 && gz_init(state) == -1) - return 0; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return 0; - } - - / consume whatever's left in the input buffer / - if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) - return 0; - - / do the printf() into the input buffer, put length in len / - size = (int)(state->size); - state->in[size - 1] = 0; -#ifdef NO_snprintf -# ifdef HAS_sprintf_void - sprintf(state->in, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); - for (len = 0; len < size; len++) - if (state->in[len] == 0) break; -# else - len = sprintf(state->in, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); -# endif -#else -# ifdef HAS_snprintf_void - snprintf(state->in, size, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); - len = strlen(state->in); -# else - len = snprintf(state->in, size, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); -# endif -#endif - - / check that printf() results fit in buffer / - if (len <= 0 || len >= (int)size || state->in[size - 1] != 0) - return 0; - - / update buffer and position, defer compression until needed / - strm->avail_in = (unsigned)len; - strm->next_in = state->in; - state->pos += len; - return len; -} - -#endif - -/ -- see zlib.h -- / -int ZEXPORT gzflush(file, flush) - gzFile file; - int flush; -{ - gz_statep state; - - / get internal structure / - if (file == NULL) - return -1; - state = (gz_statep)file; - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return Z_STREAM_ERROR; - - / check flush parameter / - if (flush < 0 || flush > Z_FINISH) - return Z_STREAM_ERROR; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return -1; - } - - / compress remaining data with requested flush / - gz_comp(state, flush); - return state->err; -} - -/ -- see zlib.h -- / -int ZEXPORT gzsetparams(file, level, strategy) - gzFile file; - int level; - int strategy; -{ - gz_statep state; - z_streamp strm; - - / get internal structure / - if (file == NULL) - return Z_STREAM_ERROR; - state = (gz_statep)file; - strm = &(state->strm); - - / check that we're writing and that there's no error / - if (state->mode != GZ_WRITE || state->err != Z_OK) - return Z_STREAM_ERROR; - - / if no change is requested, then do nothing / - if (level == state->level && strategy == state->strategy) - return Z_OK; - - / check for seek request / - if (state->seek) { - state->seek = 0; - if (gz_zero(state, state->skip) == -1) - return -1; - } - - / change compression parameters for subsequent input / - if (state->size) { - / flush previous input with previous parameters before changing / - if (strm->avail_in && gz_comp(state, Z_PARTIAL_FLUSH) == -1) - return state->err; - deflateParams(strm, level, strategy); - } - state->level = level; - state->strategy = strategy; - return Z_OK; -} - -/ -- see zlib.h -- / -int ZEXPORT gzclose_w(file) - gzFile file; -{ - int ret = 0; - gz_statep state; - - / get internal structure / - if (file == NULL) - return Z_STREAM_ERROR; - state = (gz_statep)file; - - / check that we're writing / - if (state->mode != GZ_WRITE) - return Z_STREAM_ERROR; - - / check for seek request / - if (state->seek) { - state->seek = 0; - ret += gz_zero(state, state->skip); - } - - / flush, free memory, and close file */ - ret += gz_comp(state, Z_FINISH); - (void)deflateEnd(&(state->strm)); - free(state->out); - free(state->in); - gz_error(state, Z_OK, NULL); - free(state->path); - ret += close(state->fd); - free(state); - return ret ? Z_ERRNO : Z_OK; -} From phk at varnish-cache.org Mon Feb 28 21:35:29 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 22:35:29 +0100 Subject: [master] b4f1774 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache Message-ID: commit b4f1774d0fe2690d6768615b1ff0694c2fdfbd05 Merge: 89289cf e2287d2 Author: Poul-Henning Kamp Date: Mon Feb 28 21:35:15 2011 +0000 Merge branch 'master' of ssh://git.varnish-cache.org/git/varnish-cache From phk at varnish-cache.org Mon Feb 28 21:47:24 2011 From: phk at varnish-cache.org (Poul-Henning Kamp) Date: Mon, 28 Feb 2011 22:47:24 +0100 Subject: [master] c85b3ea If we have a CLANG compiler, use it in the absense of any preferences. Message-ID: commit c85b3ea54cbcab0c9807888cca93b0e60acbabc7 Author: Poul-Henning Kamp Date: Mon Feb 28 21:46:35 2011 +0000 If we have a CLANG compiler, use it in the absense of any preferences. This only affects development environments on FreeBSD, but hopefully exposing the Varnish source code to yet another compiler will help expose bugs. diff --git a/autogen.des b/autogen.des index b220566..0c272bc 100755 --- a/autogen.des +++ b/autogen.des @@ -5,6 +5,12 @@ set -ex +# Prefer CLANG if we have it, and have not given preferences +if [ -f /usr/bin/clang -a "x${CC}" = "x" ] ; then + CC=clang + export CC +fi + . ./autogen.sh # autoconf prior to 2.62 has issues with zsh 4.2 and newer