From martin at varnish-software.com Wed Feb 4 15:12:38 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Wed, 4 Feb 2015 16:12:38 +0100 Subject: [PATCH] Do not recognize a 304 as a valid revalidation response for an ims_oc without OF_IMSCAND Message-ID: <1423062758-25995-1-git-send-email-martin@varnish-software.com> Fixes: #1672 --- bin/varnishd/cache/cache_fetch.c | 4 +++- bin/varnishtest/tests/r01672.vtc | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 bin/varnishtest/tests/r01672.vtc diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c index 07801e8..79e2c3b 100644 --- a/bin/varnishd/cache/cache_fetch.c +++ b/bin/varnishd/cache/cache_fetch.c @@ -393,7 +393,9 @@ vbf_stp_startfetch(struct worker *wrk, struct busyobj *bo) AZ(bo->do_esi); - if (bo->ims_oc != NULL && http_IsStatus(bo->beresp, 304)) { + if (bo->ims_oc != NULL && + ObjCheckFlag(bo->wrk, bo->ims_oc, OF_IMSCAND) && + http_IsStatus(bo->beresp, 304)) { if (ObjCheckFlag(bo->wrk, bo->ims_oc, OF_CHGGZIP)) { /* * If we changed the gzip status of the object diff --git a/bin/varnishtest/tests/r01672.vtc b/bin/varnishtest/tests/r01672.vtc new file mode 100644 index 0000000..ae99a2f --- /dev/null +++ b/bin/varnishtest/tests/r01672.vtc @@ -0,0 +1,33 @@ +varnishtest "#1672: Bogus 304 backend reply" + +# First serve a non-200 status object to the cache, +# then revalidate it unconditionally +server s1 { + rxreq + txresp -status 404 + + rxreq + txresp -status 304 +} -start + +varnish v1 -vcl+backend { + sub vcl_backend_response { + set beresp.ttl = 0.1s; + set beresp.grace = 0s; + set beresp.keep = 10s; + } +} -start + +client c1 { + txreq + rxresp + expect resp.status == 404 +} -run + +delay 0.2 + +client c1 { + txreq + rxresp + expect resp.status == 304 +} -run -- 2.1.4 From martin at varnish-software.com Thu Feb 5 13:00:47 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 5 Feb 2015 14:00:47 +0100 Subject: [PATCH 1/2] Rename busyobj->ims_oc to busyobj->stale_oc Message-ID: <1423141248-16030-1-git-send-email-martin@varnish-software.com> This to better reflect the role of this reference, that is both being used as a template for IMS revalidation and as a reference to a stale object to be invalidated from the cache on fetch success. --- bin/varnishd/cache/cache.h | 2 +- bin/varnishd/cache/cache_fetch.c | 50 ++++++++++++++++++++-------------------- bin/varnishd/cache/cache_panic.c | 4 ++-- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h index fccc466..f758909 100644 --- a/bin/varnishd/cache/cache.h +++ b/bin/varnishd/cache/cache.h @@ -509,7 +509,7 @@ struct busyobj { struct http *bereq0; struct http *bereq; struct http *beresp; - struct objcore *ims_oc; + struct objcore *stale_oc; struct objcore *fetch_objcore; struct http_conn *htc; diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c index 07801e8..0f229ba 100644 --- a/bin/varnishd/cache/cache_fetch.c +++ b/bin/varnishd/cache/cache_fetch.c @@ -192,13 +192,13 @@ vbf_stp_mkbereq(const struct worker *wrk, struct busyobj *bo) http_CopyHome(bo->bereq0); } - if (bo->ims_oc != NULL && - ObjCheckFlag(bo->wrk, bo->ims_oc, OF_IMSCAND)) { - q = HTTP_GetHdrPack(bo->wrk, bo->ims_oc, H_Last_Modified); + if (bo->stale_oc != NULL && + ObjCheckFlag(bo->wrk, bo->stale_oc, OF_IMSCAND)) { + q = HTTP_GetHdrPack(bo->wrk, bo->stale_oc, H_Last_Modified); if (q != NULL) http_PrintfHeader(bo->bereq0, "If-Modified-Since: %s", q); - q = HTTP_GetHdrPack(bo->wrk, bo->ims_oc, H_ETag); + q = HTTP_GetHdrPack(bo->wrk, bo->stale_oc, H_ETag); if (q != NULL) http_PrintfHeader(bo->bereq0, "If-None-Match: %s", q); @@ -393,8 +393,8 @@ vbf_stp_startfetch(struct worker *wrk, struct busyobj *bo) AZ(bo->do_esi); - if (bo->ims_oc != NULL && http_IsStatus(bo->beresp, 304)) { - if (ObjCheckFlag(bo->wrk, bo->ims_oc, OF_CHGGZIP)) { + if (bo->stale_oc != NULL && http_IsStatus(bo->beresp, 304)) { + if (ObjCheckFlag(bo->wrk, bo->stale_oc, OF_CHGGZIP)) { /* * If we changed the gzip status of the object * the stored Content_Encoding controls we @@ -404,7 +404,7 @@ vbf_stp_startfetch(struct worker *wrk, struct busyobj *bo) RFC2616_Weaken_Etag(bo->beresp); } http_Unset(bo->beresp, H_Content_Length); - HTTP_Merge(bo->wrk, bo->ims_oc, bo->beresp); + HTTP_Merge(bo->wrk, bo->stale_oc, bo->beresp); assert(http_IsStatus(bo->beresp, 200)); do_ims = 1; } else @@ -672,8 +672,8 @@ vbf_stp_fetch(struct worker *wrk, struct busyobj *bo) VBO_setstate(bo, BOS_FINISHED); VSLb_ts_busyobj(bo, "BerespBody", W_TIM_real(wrk)); - if (bo->ims_oc != NULL) - EXP_Rearm(bo->ims_oc, bo->ims_oc->exp.t_origin, 0, 0, 0); + if (bo->stale_oc != NULL) + EXP_Rearm(bo->stale_oc, bo->stale_oc->exp.t_origin, 0, 0, 0); return (F_STP_DONE); } @@ -694,12 +694,12 @@ vbf_stp_condfetch(struct worker *wrk, struct busyobj *bo) AZ(vbf_beresp2obj(bo)); - if (ObjGetattr(bo->wrk, bo->ims_oc, OA_ESIDATA, NULL) != NULL) - AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->ims_oc, + if (ObjGetattr(bo->wrk, bo->stale_oc, OA_ESIDATA, NULL) != NULL) + AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->stale_oc, OA_ESIDATA)); - AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->ims_oc, OA_FLAGS)); - AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->ims_oc, OA_GZIPBITS)); + AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->stale_oc, OA_FLAGS)); + AZ(ObjCopyAttr(bo->wrk, bo->fetch_objcore, bo->stale_oc, OA_GZIPBITS)); if (bo->do_stream) { HSH_Unbusy(wrk, bo->fetch_objcore); @@ -707,13 +707,13 @@ vbf_stp_condfetch(struct worker *wrk, struct busyobj *bo) } al = 0; - oi = ObjIterBegin(wrk, bo->ims_oc); + oi = ObjIterBegin(wrk, bo->stale_oc); do { - ois = ObjIter(bo->ims_oc, oi, &sp, &sl); + ois = ObjIter(bo->stale_oc, oi, &sp, &sl); if (ois == OIS_ERROR) (void)VFP_Error(bo->vfc, "Template object failed"); while (sl > 0) { - l = ObjGetLen(bo->wrk, bo->ims_oc) - al; + l = ObjGetLen(bo->wrk, bo->stale_oc) - al; assert(l > 0); if (VFP_GetStorage(bo->vfc, &l, &ptr) != VFP_OK) break; @@ -726,8 +726,8 @@ vbf_stp_condfetch(struct worker *wrk, struct busyobj *bo) sl -= l; } } while (!bo->vfc->failed && (ois == OIS_DATA || ois == OIS_STREAM)); - ObjIterEnd(bo->ims_oc, &oi); - if (bo->ims_oc->flags & OC_F_FAILED) + ObjIterEnd(bo->stale_oc, &oi); + if (bo->stale_oc->flags & OC_F_FAILED) (void)VFP_Error(bo->vfc, "Template object failed"); if (bo->vfc->failed) { VDI_Finish(bo->wrk, bo); @@ -738,7 +738,7 @@ vbf_stp_condfetch(struct worker *wrk, struct busyobj *bo) HSH_Unbusy(wrk, bo->fetch_objcore); assert(ObjGetLen(bo->wrk, bo->fetch_objcore) == al); - EXP_Rearm(bo->ims_oc, bo->ims_oc->exp.t_origin, 0, 0, 0); + EXP_Rearm(bo->stale_oc, bo->stale_oc->exp.t_origin, 0, 0, 0); /* Recycle the backend connection before setting BOS_FINISHED to give predictable backend reuse behavior for varnishtest */ @@ -918,8 +918,8 @@ vbf_fetch_thread(struct worker *wrk, void *priv) } AZ(bo->fetch_objcore->busyobj); - if (bo->ims_oc != NULL) - (void)HSH_DerefObjCore(wrk, &bo->ims_oc); + if (bo->stale_oc != NULL) + (void)HSH_DerefObjCore(wrk, &bo->stale_oc); wrk->vsl = NULL; @@ -974,11 +974,11 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc, HSH_Ref(oc); bo->fetch_objcore = oc; - AZ(bo->ims_oc); + AZ(bo->stale_oc); if (oldoc != NULL) { assert(oldoc->refcnt > 0); HSH_Ref(oldoc); - bo->ims_oc = oldoc; + bo->stale_oc = oldoc; } AZ(bo->req); @@ -990,8 +990,8 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc, if (Pool_Task(wrk->pool, &bo->fetch_task, POOL_QUEUE_FRONT)) { wrk->stats->fetch_no_thread++; (void)vbf_stp_fail(req->wrk, bo); - if (bo->ims_oc != NULL) - (void)HSH_DerefObjCore(wrk, &bo->ims_oc); + if (bo->stale_oc != NULL) + (void)HSH_DerefObjCore(wrk, &bo->stale_oc); VBO_DerefBusyObj(wrk, &bo_fetch); } else { bo_fetch = NULL; /* ref transferred to fetch thread */ diff --git a/bin/varnishd/cache/cache_panic.c b/bin/varnishd/cache/cache_panic.c index a5c2ce6..a65f037 100644 --- a/bin/varnishd/cache/cache_panic.c +++ b/bin/varnishd/cache/cache_panic.c @@ -356,8 +356,8 @@ pan_busyobj(const struct busyobj *bo) pan_http("beresp", bo->beresp, 4); if (bo->fetch_objcore) pan_objcore("FETCH", bo->fetch_objcore); - if (bo->ims_oc) - pan_objcore("IMS", bo->ims_oc); + if (bo->stale_oc) + pan_objcore("STALE", bo->stale_oc); VSB_printf(pan_vsp, " }\n"); } -- 2.1.4 From martin at varnish-software.com Thu Feb 5 13:00:48 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 5 Feb 2015 14:00:48 +0100 Subject: [PATCH 2/2] Rename req->ims_oc to req->stale_oc In-Reply-To: <1423141248-16030-1-git-send-email-martin@varnish-software.com> References: <1423141248-16030-1-git-send-email-martin@varnish-software.com> Message-ID: <1423141248-16030-2-git-send-email-martin@varnish-software.com> This to better reflect it's role as a stale reference that should be passed to the fetch thread. --- bin/varnishd/cache/cache.h | 2 +- bin/varnishd/cache/cache_req_fsm.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h index f758909..faaaf4f 100644 --- a/bin/varnishd/cache/cache.h +++ b/bin/varnishd/cache/cache.h @@ -610,7 +610,7 @@ struct req { struct ws ws[1]; struct objcore *objcore; - struct objcore *ims_oc; + struct objcore *stale_oc; /* Lookup stuff */ struct SHA256Context *sha256ctx; diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c index e080eee..aa737d9 100644 --- a/bin/varnishd/cache/cache_req_fsm.c +++ b/bin/varnishd/cache/cache_req_fsm.c @@ -371,7 +371,7 @@ cnt_lookup(struct worker *wrk, struct req *req) case VCL_RET_FETCH: if (boc != NULL) { req->objcore = boc; - req->ims_oc = oc; + req->stale_oc = oc; req->req_step = R_STP_MISS; } else { (void)HSH_DerefObjCore(wrk, &req->objcore); @@ -428,10 +428,10 @@ cnt_miss(struct worker *wrk, struct req *req) switch (wrk->handling) { case VCL_RET_FETCH: wrk->stats->cache_miss++; - VBF_Fetch(wrk, req, req->objcore, req->ims_oc, VBF_NORMAL); + VBF_Fetch(wrk, req, req->objcore, req->stale_oc, VBF_NORMAL); req->req_step = R_STP_FETCH; - if (req->ims_oc != NULL) - (void)HSH_DerefObjCore(wrk, &req->ims_oc); + if (req->stale_oc != NULL) + (void)HSH_DerefObjCore(wrk, &req->stale_oc); return (REQ_FSM_MORE); case VCL_RET_SYNTH: req->req_step = R_STP_SYNTH; @@ -446,8 +446,8 @@ cnt_miss(struct worker *wrk, struct req *req) WRONG("Illegal return from vcl_miss{}"); } VRY_Clear(req); - if (req->ims_oc != NULL) - (void)HSH_DerefObjCore(wrk, &req->ims_oc); + if (req->stale_oc != NULL) + (void)HSH_DerefObjCore(wrk, &req->stale_oc); AZ(HSH_DerefObjCore(wrk, &req->objcore)); return (REQ_FSM_MORE); } -- 2.1.4 From tfheen at fastly.com Fri Feb 6 08:48:16 2015 From: tfheen at fastly.com (Tollef Fog Heen) Date: Fri, 6 Feb 2015 09:48:16 +0100 Subject: [master] c816cc1 Remove statoverride on uninstall/purge. In-Reply-To: References: Message-ID: 2015-02-05 13:51 GMT+01:00 Lasse Karstensen : > commit c816cc198a3bea643a9f8b54eaed81d8884cae23 > Author: Lasse Karstensen > Date: Thu Feb 5 13:49:40 2015 +0100 > > Remove statoverride on uninstall/purge. > > This is added during package installation, and was previously not > cleaned up properly. This is fine. > Having the extra line in statoverride that references a non-existing > varnishlog user, makes dpkg very sad. The user isn't (and shouldn't) be removed on removal or purge, so I'm not sure how this would happen. - Tollef From slink at schokola.de Mon Feb 9 12:46:58 2015 From: slink at schokola.de (Nils Goroll) Date: Mon, 09 Feb 2015 13:46:58 +0100 Subject: [PATCH] Allow varnish_reload_vcl to discard old VCL after reload In-Reply-To: <5F530A9242E7F84F999DB40E0E268FBD45EB9CBA@mercalli.lild01.pictime.fr> References: <5F530A9242E7F84F999DB40E0E268FBD45EB9CBA@mercalli.lild01.pictime.fr> Message-ID: <54D8AC42.50506@schokola.de> On 19/11/14 10:10, Delacroix, Gauthier wrote: > This patch allows varnish_reload_vcl to discard old (available) VCL > configurations after reload. DISCARD_OLD_VCL environment variable must be set > to 1. It is set to 0 by default to avoid unexpected behavior change. Finally got around to ask about this on -hacking: (13:44:49) scn: i think the consensus was that we keep n around (13:44:56) scn: and make n configurable (13:45:03) scn: we discussed this at some point on some vdd So we should - not exit 1 if the discard fails - the vcl could still be referenced - have a parameter for the number of vcls to keep loaded. Nils From martin at varnish-software.com Mon Feb 9 14:32:32 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Mon, 9 Feb 2015 15:32:32 +0100 Subject: Workspace overflow handling Message-ID: VDD Hamburg talking point: Varnish asserting on workspace overflow is a problem that we really should address. It is most hurtful when it happens in Varnish core, as there are many code paths relying on workspace being available. If none was available the assertion triggers taking the cache with it. (Examples: Vary processing, delivery processor pushes, delivery IO vectors etc). Creating proper error handling and state unwinding for all these will be a major undertaking, and also error prone as testing all the failure points will be very hard. Workspace exhaustion also hurt in VCL space. Most VRT functions are written to handle it, but will do so by truncating the result and log the fact (LostHeader). This masks errors, and can potentially be an attack vector for circumventing VCL implemented security barriers. It also poses a DOS attack vector, if you can know there are some serious manipulations happening on some header and send large payloads on them, causing an assert later when Varnish attempts delivery. In my opinion any failed attempt at setting a header from VCL should result in an error response immediately as we could not process the request properly. One way of dealing with this issue would be to add some guarantees for workspace allocations: Unless the workspace overflow flag is already set, all code is guaranteed to be able to allocate at least the set size of the workspace. This is achieved by allocating twice the amount of needed workspace on allocation. Since this space is normally untouched it will just be virtual memory and not backed by real memory. (We might have to bypass malloc and go for mmap anonymous to be able to do that). All WS_Alloc/WS_Release calls will then update the overflow flag whenever half of the available workspace has been used. Upon recycling of the workspace (request or busyobj), the flag is tested and if an overflow occured an madvise(MADV_DONTNEED/MADV_FREE) is issued on the second half of the mapping to return the pages to the OS. This way the extra pages are returned to the OS, causing the range to be pure virtual again. Error handling in Varnish core will now be able to just have a handful of check points (mostly after the major VCL functions where we are prepared to error out anyways). If the overflow flag is set, we write out a static 5xx response (unless it's too late), and start processing the next request (or close if that's too late). In VCL we will teach the VCC compiler to check after each statement if the overflow flag is set, and return immediately when it is (so VCL execution is terminated prematurely). The next check point in Varnish core will then pick up that the overflow has happened and error out from there. Comments much appreciated. Regards, Martin Blix Grydeland -- *Martin Blix Grydeland* Senior Developer | Varnish Software AS Mobile: +47 992 74 756 We Make Websites Fly! -------------- next part -------------- An HTML attachment was scrubbed... URL: From dridi.boukelmoune at zenika.com Mon Feb 9 14:56:26 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Mon, 9 Feb 2015 15:56:26 +0100 Subject: Workspace overflow handling In-Reply-To: References: Message-ID: Hi Martin, I have been thinking about this for a while, but couldn't go past "it is sometimes overkill to crash the whole child process". Regarding your comment on state unwinding, my main concern is critical sections and shared/global state. But I'm not familiar enough with the code base to measure the implications. On an unrelated note, I have been trying to code in Rust[1] lately, and it provides a `panic!` macro[2]. It will kill the failing thread/task and somewhat leave the rest of the program alone[3] which I thought could also apply to worker threads. I know this is half off-topic, but on top of not panicking on memory exhaustion, I thought we could also "panic" a single thread when it's a worker (regardless of the issue) when we can't serve a 5xx response. The current assert system is a bit too unforgiving for many cases. I hope I'm making some sense here. Regards, Dridi [1] http://www.rust-lang.org/ [2] http://doc.rust-lang.org/std/macro.panic!.html [3] http://doc.rust-lang.org/reference.html#thread On Mon, Feb 9, 2015 at 3:32 PM, Martin Blix Grydeland wrote: > > VDD Hamburg talking point: > > Varnish asserting on workspace overflow is a problem that we really should address. It is most hurtful when it happens in Varnish core, as there are many code paths relying on workspace being available. If none was available the assertion triggers taking the cache with it. (Examples: Vary processing, delivery processor pushes, delivery IO vectors etc). Creating proper error handling and state unwinding for all these will be a major undertaking, and also error prone as testing all the failure points will be very hard. > > Workspace exhaustion also hurt in VCL space. Most VRT functions are written to handle it, but will do so by truncating the result and log the fact (LostHeader). This masks errors, and can potentially be an attack vector for circumventing VCL implemented security barriers. It also poses a DOS attack vector, if you can know there are some serious manipulations happening on some header and send large payloads on them, causing an assert later when Varnish attempts delivery. In my opinion any failed attempt at setting a header from VCL should result in an error response immediately as we could not process the request properly. > > One way of dealing with this issue would be to add some guarantees for workspace allocations: Unless the workspace overflow flag is already set, all code is guaranteed to be able to allocate at least the set size of the workspace. This is achieved by allocating twice the amount of needed workspace on allocation. Since this space is normally untouched it will just be virtual memory and not backed by real memory. (We might have to bypass malloc and go for mmap anonymous to be able to do that). All WS_Alloc/WS_Release calls will then update the overflow flag whenever half of the available workspace has been used. Upon recycling of the workspace (request or busyobj), the flag is tested and if an overflow occured an madvise(MADV_DONTNEED/MADV_FREE) is issued on the second half of the mapping to return the pages to the OS. This way the extra pages are returned to the OS, causing the range to be pure virtual again. > > Error handling in Varnish core will now be able to just have a handful of check points (mostly after the major VCL functions where we are prepared to error out anyways). If the overflow flag is set, we write out a static 5xx response (unless it's too late), and start processing the next request (or close if that's too late). > > In VCL we will teach the VCC compiler to check after each statement if the overflow flag is set, and return immediately when it is (so VCL execution is terminated prematurely). The next check point in Varnish core will then pick up that the overflow has happened and error out from there. > > Comments much appreciated. > > Regards, > Martin Blix Grydeland > > -- > Martin Blix Grydeland > Senior Developer | Varnish Software AS > Mobile: +47 992 74 756 > We Make Websites Fly! > > _______________________________________________ > varnish-dev mailing list > varnish-dev at varnish-cache.org > https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev From lkarsten at varnish-software.com Mon Feb 9 19:51:21 2015 From: lkarsten at varnish-software.com (Lasse Karstensen) Date: Mon, 9 Feb 2015 20:51:21 +0100 Subject: [master] c816cc1 Remove statoverride on uninstall/purge. In-Reply-To: References: Message-ID: <20150209195120.GA29454@immer.varnish-software.com> On Fri, Feb 06, 2015 at 09:48:16AM +0100, Tollef Fog Heen wrote: > 2015-02-05 13:51 GMT+01:00 Lasse Karstensen : > > Having the extra line in statoverride that references a non-existing > > varnishlog user, makes dpkg very sad. > The user isn't (and shouldn't) be removed on removal or purge, so I'm > not sure how this would happen. I spent a fair amount of hours spent digging into this. When doing manual install and then remove on wheezy, I wasn't able to see the behaviour. Is this what you are seeing as well? Yet our wheezy build servers, where packages are installed/uninstalled by sbuild, got into this state. I relayed the patch to Stig (debian varnish maint.) and asked for feedback. If you have any further insights, I'm happy to listen. -- Lasse Karstensen Varnish Software AS From phk at phk.freebsd.dk Mon Feb 9 21:20:03 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Mon, 09 Feb 2015 21:20:03 +0000 Subject: Workspace overflow handling In-Reply-To: References: Message-ID: <21213.1423516803@critter.freebsd.dk> -------- I've spent a lot of time pondering this over the years, and as always with hard problems, there are no easy, cheap and correct solutions, and you may not even get to pick two out of three. When I designed the "sbuf" API many years ago in FreeBSD (also known as VSB's in Varnish) I had concluded that the main "threat" of good robust error handling was that it munged up the source-code with pointless verbiage and boiler-plate. The result was the "latch-and-report" scheme of sbuf/VSB, where you can carry on as long as you like after a failure, but nothing (bad) is going to happen, because the error-latch is already set. I spent some time, before getting distracted by other more important(?) hackery, giving struct http such a latch-and-report mechanism, except the latch is actually on the workspace associated with the http structure. Once completed, that work should pretty much take care of compiled VCL code because we can simply test when the VCL method returns and fail the transaction there. VMODs and VRTs need to be more alert obviously, but that is much more manageable. I don't belive much in your "just double the allocation with unused VM" idea, first because there is no guarantee that the number two is magic and big enough, so we'd still have to check all over the place, and second because the necessay management of map/unmap is a needless complication. So summary: I think we all agree on the end result, and I belive that mark/report is our main means to get there, but there is so much code to hack and so little time... Poul-Henning PS: And no, I'm not going to rewrite Varnish in Rust -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From phk at phk.freebsd.dk Mon Feb 9 21:25:56 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Mon, 09 Feb 2015 21:25:56 +0000 Subject: Workspace overflow handling In-Reply-To: References: Message-ID: <21253.1423517156@critter.freebsd.dk> -------- In message , Dridi Boukelmoune writes: >I know this is half off-topic, but on top of not panicking on memory >exhaustion, I thought we could also "panic" a single thread when it's a >worker (regardless of the issue) when we can't serve a 5xx response. It's a workable solution when you have memory protection between your threads (which is, if you think about it, basically why it works for UNIX kernels to coredump processes. But for threads in the same address space, using non-isolated means of intercommunication (ie: userland mtx/condvar with kernel assist) as opposed to kernel mediated means of intercommunication, it invariably becomes hairy, bugprone and full of races. Thus I'm not very thrilled about this idea. >The current assert system is a bit too unforgiving for many cases. I fully agree, but still preferable to running with inconsistent data structures. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From dridi.boukelmoune at zenika.com Mon Feb 9 21:54:35 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Mon, 9 Feb 2015 22:54:35 +0100 Subject: Workspace overflow handling In-Reply-To: <21253.1423517156@critter.freebsd.dk> References: <21253.1423517156@critter.freebsd.dk> Message-ID: On Mon, Feb 9, 2015 at 10:25 PM, Poul-Henning Kamp wrote: > But for threads in the same address space, using non-isolated means > of intercommunication (ie: userland mtx/condvar with kernel assist) > as opposed to kernel mediated means of intercommunication, it > invariably becomes hairy, bugprone and full of races. > > Thus I'm not very thrilled about this idea. That's what I meant in the second paragraph of my answer to Martin. So assuming an "isolated" address space (my understanding of workspaces), a worker thread could avoid a child panic on some code paths. Please note the quotes around "isolated", I'm not saying workspaces actually are. You could imagine new `wk_assert`, `WK_AZ`, `WK_AN` etc macros to avoid boiler-plate code, but only in relevant places. > I fully agree, but still preferable to running with inconsistent > data structures. It would become even harder to debug. My suggestion relies on the supposition that it would (sometimes) be possible to kill a single worker thread without corrupting the whole process. Regards, Dridi PS. Maybe I'll start a new Tarnish Cache project in Rust :-) From martin at varnish-software.com Tue Feb 10 08:49:23 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Tue, 10 Feb 2015 09:49:23 +0100 Subject: Fwd: Workspace overflow handling In-Reply-To: References: <21213.1423516803@critter.freebsd.dk> Message-ID: Forgot reply-all Martin ---------- Forwarded message ---------- From: Martin Blix Grydeland Date: 10 February 2015 at 09:48 Subject: Re: Workspace overflow handling To: Poul-Henning Kamp On 9 February 2015 at 22:20, Poul-Henning Kamp wrote: > -------- > > I've spent a lot of time pondering this over the years, and as > always with hard problems, there are no easy, cheap and correct > solutions, > I'd say two out of three still isn't bad, and in my opinion the proposal scores good on both the easy and cheap metric, presenting a feasible solution although maybe interim. I believe this could be achieved within reasonable time frame and be a 95% solution to a problem that I feel is becoming increasingly urgent to fix. > I don't belive much in your "just double the allocation with unused > VM" idea, first because there is no guarantee that the number two > is magic and big enough, so we'd still have to check all over the > place, and second because the necessay management of map/unmap is > a needless complication. > I believe that for the core Varnish code some minimum amount of workspace needed to succeed could be calculated (some number of headers we know we could set multiplied with the maximum header length, and add the overhead of the IO vectors...) As long as WS size is much larger than this, it will work. And then it'll buy us the time we need to do it the correct way. > > So summary: I think we all agree on the end result, and I belive > that mark/report is our main means to get there, but there is > so much code to hack and so little time... > Which is really the basis for this pragmatic approach to the problem. Martin -- *Martin Blix Grydeland* Senior Developer | Varnish Software AS Mobile: +47 992 74 756 We Make Websites Fly! -- *Martin Blix Grydeland* Senior Developer | Varnish Software AS Mobile: +47 992 74 756 We Make Websites Fly! -------------- next part -------------- An HTML attachment was scrubbed... URL: From phk at phk.freebsd.dk Wed Feb 11 09:39:31 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 11 Feb 2015 09:39:31 +0000 Subject: Thinking about sandboxing Message-ID: <20259.1423647571@critter.freebsd.dk> So inspired by #1663 and stuck in trains and meetings yesterday, I went over the VCC/CC code and spent some times staring into the future of sandboxing. Just to re-iterate our overall security model: level#0: The privilege used to start varnishd level#1: The privilege to access CLI level#2: The privilege to access VSM (VSC/VSL) level#3: The privilege to send a request through the worker It is worth putting in the record, that with the advent of VMODs we have given up the ability to sandbox subprocesses (VCC/CC/VCLLOAD/WORKER) into the working directory: They need to be able to reach out and find VMODs and God only knows what the VMODs themselves will try to accesss. This reduces the "unix-level" sandboxing to the question of ownership and modes on and in the working directory plus whatever enhanced sandboxing the particular operating system offers. We have traditionally used nobody:nogroup (uid:gid) for sandboxing, but I have reached the conclusion that we should migrate to a dedicated varnish:varnish identity. For instance getting to the VSM (level#2) means that you need to be able to get to the VSM file in the working directory, which again has implications for the permissions on that directory and the path to it. But separating level#1 and level#2 access requires there to be different access to the _.vsm and _.secret files, and both privileges should ideally be group based. After thinking about all of this for some time, this is what I came up with: We have the following parameters: $user The $user defaults to "varnish" (!root: $uid) and is used to own all files created by varnish, and to prevent other programs or identities from pulling the rug under varnishd. $group The $group_cli defaults to "varnish", (!root: $gid) but can be set to any random group, in which case it acts as restrictor for level#1 access for this instance of Varnish and can be used as restrictor for sensitive VCL and VMOD files. $group_vsm The $group defaults to "varnish", (!root: $gid) but can be set to any random group, in which case it acts as restrictor for level#2 access for each instance of Varnish. $group_cc Platform dependent group added to the CC subprocess for access to C-compiler bits. (default: empty) feature::public_vsm The VSM will be publically readable, defaults to false. Suggested permissions: -n directory: 755 $user:$group _.vsm 640 $user:$group_vsm (!feature::public_vsm) _.vsm 644 $user:$group_vsm (feature::public_vsm) _.secret 640 $user:$group vcl.*.c 660 $user:$group (temporary file) vcl.*.so 440 $user:$group subprocesses: VCC $user:$group CC $user:$group+$group_cc DLOPEN $user:$group WORKER $user:$group Comments ? -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From phk at phk.freebsd.dk Wed Feb 11 10:41:58 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 11 Feb 2015 10:41:58 +0000 Subject: Fwd: Workspace overflow handling In-Reply-To: References: <21213.1423516803@critter.freebsd.dk> Message-ID: <20644.1423651318@critter.freebsd.dk> -------- In message , Martin Blix Grydeland writes: >Which is really the basis for this pragmatic approach to the problem. I've sort of had it with "pragmatic approaches to problems" because I have spent so much of my 30 years in professional computing cleaning up after them. No, lets do it right from the beginning, so we don't ever have to revisit it again. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From slink at schokola.de Wed Feb 11 15:01:09 2015 From: slink at schokola.de (Nils Goroll) Date: Wed, 11 Feb 2015 16:01:09 +0100 Subject: Thinking about sandboxing In-Reply-To: <20259.1423647571@critter.freebsd.dk> References: <20259.1423647571@critter.freebsd.dk> Message-ID: <54DB6EB5.7080403@schokola.de> Hi phk, I like your thoughts, but with Solaris privileges and Linux capabilities, I do see some additional aspects to consider (I will refer to both as "privileges" below): * With a privilege aware process, the ability to switch uids does not imply any other super cow powers. * varnish should not require the file_chown/CAP_CHOWN (chown files owned by other users) nor file_chown_self (solaris "giveaway") privileges. I think the varnishd master process (level#0) should continue to run as a different $master_user (root) than $user (varnish) in order to tie elevated privileges (opening reserved ports, setuid, fork, etc.) to that user and not require them for the varnish $user [even if the user is root, least privileges may be in effect and the master process may start with a reduced privilege set]. In this scenario... On 11/02/15 10:39, Poul-Henning Kamp wrote: > > -n directory: 755 $user:$group > _.vsm 640 $user:$group_vsm (!feature::public_vsm) > _.vsm 644 $user:$group_vsm (feature::public_vsm) > _.secret 640 $user:$group > vcl.*.c 660 $user:$group (temporary file) > vcl.*.so 440 $user:$group $master_user (root) would not be able to open any of these files (or only _.vsm for feature::public_vsm), unless it was also a member of $group and/or $group_vsm. I don't want my root user to be a member of the varnish $group. Creating them as $master_user (root) and giving them away would be an option requiring one or the other chown privilege, which I think we can avoid: I suggest to have a configurable $vcc_dir which defaults to sit next to the -n directory (so by default it gets created in the same parent directory). The master process would create the -n directory, the vcc subprocesses would create the $vcc_dir. No chown involved. We'd end up with the following permissions: # e.g. /tmp/varnish_name -n directory: 755 $master_user:$group _.vsm 640 $master_user:$group_vsm (!feature::public_vsm) _.vsm 644 $master_user:$group_vsm (feature::public_vsm) _.secret 640 $master_user:$group # e.g. /tmp/varnish_name.vcc $vcc_dir: 750 $user:$group vcl.*.c 660 $user:$group (temporary file) vcl.*.so 440 $user:$group mode 750 of $vcc_dir would add marginal security (avoiding unprivileged users to get to know about vcl names). As is the case now, the WORKER subprocess would inherit the _.vsm mmapped, so it wouldn't need write permission to the file. On 11/02/15 10:39, Poul-Henning Kamp wrote:> $group_cc > Platform dependent group added to the CC subprocess for > access to C-compiler bits. (default: empty) can you elaborate on this? Can you give an example what this is to be used for? How would you "add" the group to the subprocess? Nils From phk at phk.freebsd.dk Wed Feb 11 17:19:47 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 11 Feb 2015 17:19:47 +0000 Subject: Thinking about sandboxing In-Reply-To: <54DB6EB5.7080403@schokola.de> References: <20259.1423647571@critter.freebsd.dk> <54DB6EB5.7080403@schokola.de> Message-ID: <86074.1423675187@critter.freebsd.dk> -------- In message <54DB6EB5.7080403 at schokola.de>, Nils Goroll writes: >* varnish should not require the file_chown/CAP_CHOWN (chown files owned by > other users) nor file_chown_self (solaris "giveaway") privileges. That is part of the problem I'm trying to resolve, I think my proposal allows us to do so without too much hazzle, simply because we can create the files after sandboxing, thereby getting the uid right from the start. >I think the varnishd master process (level#0) should continue to run as a >different $master_user (root) The master will run with the uid it started, whatever it is. >> -n directory: 755 $user:$group >> _.vsm 640 $user:$group_vsm (!feature::public_vsm) >> _.vsm 644 $user:$group_vsm (feature::public_vsm) >> _.secret 640 $user:$group >> vcl.*.c 660 $user:$group (temporary file) >> vcl.*.so 440 $user:$group > >$master_user (root) would not be able to open any of these files If your master process runs as root, it can setgroups itself into $group. >I suggest to have a configurable $vcc_dir [...] What for ? The increment of security is pointless IMO. >On 11/02/15 10:39, Poul-Henning Kamp wrote:> $group_cc >> Platform dependent group added to the CC subprocess for >> access to C-compiler bits. (default: empty) > >can you elaborate on this? Can you give an example what this is to be used for? >How would you "add" the group to the subprocess? See ad6bf9c0e51954cc45fee92d484e95c666d99685 and #1521 -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From phk at phk.freebsd.dk Thu Feb 12 10:22:31 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Thu, 12 Feb 2015 10:22:31 +0000 Subject: Thinking about sandboxing (take #2) In-Reply-To: <20259.1423647571@critter.freebsd.dk> References: <20259.1423647571@critter.freebsd.dk> Message-ID: <34825.1423736551@critter.freebsd.dk> -------- Ok, take 2, after I remembered a fine point about gids[1]. Varnishd can be started four different ways: 1) As plain user: uid=user, euid=user 2) As setuid user: uid=user, euid=varnish Master does: setuid(geteuid()) setgid(getegid()) and run all processes with that uid/gid and the gidset we are born with. param $user = geteuid() -- read-only param $group = getegid() -- read-only param $group_vsm = unset -- read-only param $group_cc = unset -- read-only 3) As plain root: uid=root, euid=root We can do whatever we want, and parameters act as described in previous email. param $user = "varnish" param $group = "varnish" param $group_vsm = unset (which means "use $group") param $group_cc = unset (which means "don't do anything") 4) As setuid root: uid=user, euid=root This is a major security hole, because varnishd executes argument strings, notably "param.set cc_command". Off the top of my head I cannot imagine a situation where this makes any sense, but I think we should treat it just like the "plain root" scenario, rather than do policy. In the root-case, Nils proposes that we should also lower master process privs. We can for instance run most of the time with: seteuid($user) setegid($group) and re-raise to: seteuid(root) only while opening sockets. This is under further consideration for side-effects (storage files etc.) Poul-Henning [1] A lot of people are unaware that groups can be used to selectively deny access to files. If a file is 707 foo:bar, and you are member of group bar, you cannot read it, even though everybody else in the world can. The fine point I overlooked is that this is why a non-root process cannot relinguish a group from its set. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From slink at schokola.de Thu Feb 12 12:36:44 2015 From: slink at schokola.de (Nils Goroll) Date: Thu, 12 Feb 2015 13:36:44 +0100 Subject: Thinking about sandboxing In-Reply-To: <86074.1423675187@critter.freebsd.dk> References: <20259.1423647571@critter.freebsd.dk> <54DB6EB5.7080403@schokola.de> <86074.1423675187@critter.freebsd.dk> Message-ID: <54DC9E5C.5070507@schokola.de> Hi, phk and myself have continued this discussion a bit, but he had to leave, so I am continuing here. On 11/02/15 18:19, Poul-Henning Kamp wrote: >>> -n directory: 755 $user:$group >>> _.vsm 640 $user:$group_vsm (!feature::public_vsm) >>> _.vsm 644 $user:$group_vsm (feature::public_vsm) >>> _.secret 640 $user:$group >>> vcl.*.c 660 $user:$group (temporary file) >>> vcl.*.so 440 $user:$group >> >> $master_user (root) would not be able to open any of these files > > If your master process runs as root, it can setgroups itself into $group. On solaris, it can if it has the proc_setid priv. On linux, this should be CAP_SETGID. So, yes, master could setgroups() itself into $group. But then there is the (solaris-specific) issue that we cannot avoid the SNOCD flag getting set for the master process *1). The main consequence is that the master process cannot be controlled/inspected (e.g. have signals delivered, proc-tools used) from processes with the same uid/gid unless they have the proc_owner privilege. Another consequence is that we don't get core dumps unless setuid core dumps are enabled globally, but this is an issue anyway *2) In short: If we can, we should avoid any uid/gid fiddling (including setgroups()) in the master process, but I'd not consider this a blocker. >> I suggest to have a configurable $vcc_dir [...] > > What for ? If master runs as user/group master:master and -u varnish -g varnish are used, I don't see how master could read _.vsm 640 $user:$group_vsm (!feature::public_vsm) _.secret 640 $user:$group vcl.*.c 660 $user:$group (temporary file) vcl.*.so 440 $user:$group (unless it setgroups to $group, see above). So I suggested one directory "owned" by master any one shared for vcc/worker: # e.g. /tmp/varnish_name -n directory: 755 $master_user:$group _.vsm 640 $master_user:$group_vsm (!feature::public_vsm) _.vsm 644 $master_user:$group_vsm (feature::public_vsm) _.secret 640 $master_user:$group # e.g. /tmp/varnish_name.vcc $vcc_dir: 750 $user:$group vcl.*.c 660 $user:$group (temporary file) vcl.*.so 440 $user:$group > The increment of security is pointless IMO. Which is why I wrote "marginal". >> On 11/02/15 10:39, Poul-Henning Kamp wrote:> $group_cc >>> Platform dependent group added to the CC subprocess for >>> access to C-compiler bits. (default: empty) >> >> can you elaborate on this? Can you give an example what this is to be used for? >> How would you "add" the group to the subprocess? > > See ad6bf9c0e51954cc45fee92d484e95c666d99685 and #1521 Thanks for the pointers. Nils *1) I still need to re-check the current code, it looks like it gets set anyway at the moment. *2) see (now improved) comments in bin/varnishd/mgt/mgt_sandbox_solaris.c From slink at schokola.de Thu Feb 12 12:57:23 2015 From: slink at schokola.de (Nils Goroll) Date: Thu, 12 Feb 2015 13:57:23 +0100 Subject: Thinking about sandboxing (take #2) In-Reply-To: <34825.1423736551@critter.freebsd.dk> References: <20259.1423647571@critter.freebsd.dk> <34825.1423736551@critter.freebsd.dk> Message-ID: <54DCA333.5010001@schokola.de> On 12/02/15 11:22, Poul-Henning Kamp wrote: > Varnishd can be started four different ways: I'd need to wrap my head around the details, but the two additional 'least privileges' ways to start varnish (on solaris today and why-not on Linux in future) are: 5) Starting from a privilege aware process or classical 'root' with uid/gid switching ppriv -e -s \ A=basic,sys_resource,proc_setid,net_privaddr,!proc_info,!proc_session \ varnishd -u $user -g $group # *1) The user starting varnish with these privileges does not need to (but can) be root. It only needs to possess the privileges specified by A=... above. sandboxes will waive all privileges they don't need and will run as $user:$group Sample Remaining privset for a worker: 37003: /tmp/sbin/varnishd -a 127.0.0.1:81 -b 127.0.0.1:80 flags = PRIV_AWARE E: file_read,file_write,net_access I: none P: file_read,file_write,net_access,sys_resource L: file_read,file_write,net_access,sys_resource 6) Same without uid/gid switching (avoiding the SNOCD thing): Same, but without proc_setid and -u / -g identical to the user starting varnishd On 12/02/15 11:22, Poul-Henning Kamp wrote: > This is under further consideration for side-effects (storage > files etc.) Yes, and there is further complication from vmods - so we might need to make the worker privset user-configurable to allow for vmods requiring elevated privileges. Nils *1) Using A= is actaully a cheap simplification here, the minimum required privset is even smaller From phk at phk.freebsd.dk Thu Feb 12 14:58:06 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Thu, 12 Feb 2015 14:58:06 +0000 Subject: PGP/GPG keys Message-ID: <1739.1423753086@critter.freebsd.dk> At the next VDD I'd like if we could spend 10 minutes cross-signing GPG/PGP keys. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From phk at phk.freebsd.dk Mon Feb 16 08:07:16 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Mon, 16 Feb 2015 08:07:16 +0000 Subject: Thinking about sandboxing (take #2) In-Reply-To: <54DCA333.5010001@schokola.de> References: <20259.1423647571@critter.freebsd.dk> <34825.1423736551@critter.freebsd.dk> <54DCA333.5010001@schokola.de> Message-ID: <53203.1424074036@critter.freebsd.dk> -------- In message <54DCA333.5010001 at schokola.de>, Nils Goroll writes: >Yes, and there is further complication from vmods - so we might need to make the >worker privset user-configurable to allow for vmods requiring elevated privileges. And this is where I think complications exceed benefit... -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From martin at varnish-software.com Mon Feb 16 14:12:02 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Mon, 16 Feb 2015 15:12:02 +0100 Subject: [PATCH] Expire callback implementation Message-ID: <1424095922-7670-1-git-send-email-martin@varnish-software.com> --- bin/varnishd/cache/cache.h | 18 +++++- bin/varnishd/cache/cache_expire.c | 77 +++++++++++++++++++++++++- bin/varnishd/cache/cache_hash.c | 2 +- bin/varnishd/storage/storage_persistent_silo.c | 2 +- 4 files changed, 93 insertions(+), 6 deletions(-) diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h index faaaf4f..6c17c52 100644 --- a/bin/varnishd/cache/cache.h +++ b/bin/varnishd/cache/cache.h @@ -288,6 +288,18 @@ struct exp { /*--------------------------------------------------------------------*/ +typedef void exp_callback_f(struct worker *wrk, struct objcore *oc, void *priv); +struct exp_callback { + unsigned magic; +#define EXP_CALLBACK_MAGIC 0xAB956EB1 + exp_callback_f *cb_insert; + exp_callback_f *cb_remove; + void *priv; + VTAILQ_ENTRY(exp_callback) list; +}; + +/*--------------------------------------------------------------------*/ + struct vsl_log { uint32_t *wlb, *wlp, *wle; unsigned wlr; @@ -772,13 +784,15 @@ void EXP_Clr(struct exp *e); double EXP_Ttl(const struct req *, const struct exp*); double EXP_When(const struct exp *exp); -void EXP_Insert(struct objcore *oc); -void EXP_Inject(struct objcore *oc, struct lru *lru); +void EXP_Insert(struct worker *wrk, struct objcore *oc); +void EXP_Inject(struct worker *wrk, struct objcore *oc, struct lru *lru); void EXP_Init(void); void EXP_Rearm(struct objcore *, double now, double ttl, double grace, double keep); void EXP_Touch(struct objcore *oc, double now); int EXP_NukeOne(struct worker *wrk, struct lru *lru); +void EXP_Reg_Callback(struct exp_callback *cb); +void EXP_Dereg_Callback(struct exp_callback *cb); /* cache_fetch.c */ enum vbf_fetch_mode_e { diff --git a/bin/varnishd/cache/cache_expire.c b/bin/varnishd/cache/cache_expire.c index 79a8d2f..be5ffc0 100644 --- a/bin/varnishd/cache/cache_expire.c +++ b/bin/varnishd/cache/cache_expire.c @@ -52,10 +52,47 @@ struct exp_priv { VTAILQ_HEAD(,objcore) inbox; struct binheap *heap; pthread_cond_t condvar; + + VTAILQ_HEAD(,exp_callback) cb_list; + pthread_rwlock_t cb_mtx; }; static struct exp_priv *exphdl; +static void +exp_insert_cb(struct worker *wrk, struct objcore *oc) +{ + struct exp_callback *cb; + + if (VTAILQ_EMPTY(&exphdl->cb_list)) + return; + + AZ(pthread_rwlock_rdlock(&exphdl->cb_mtx)); + VTAILQ_FOREACH(cb, &exphdl->cb_list, list) { + CHECK_OBJ_NOTNULL(cb, EXP_CALLBACK_MAGIC); + if (cb->cb_insert) + cb->cb_insert(wrk, oc, cb->priv); + } + AZ(pthread_rwlock_unlock(&exphdl->cb_mtx)); +} + +static void +exp_remove_cb(struct worker *wrk, struct objcore *oc) +{ + struct exp_callback *cb; + + if (VTAILQ_EMPTY(&exphdl->cb_list)) + return; + + AZ(pthread_rwlock_rdlock(&exphdl->cb_mtx)); + VTAILQ_FOREACH(cb, &exphdl->cb_list, list) { + CHECK_OBJ_NOTNULL(cb, EXP_CALLBACK_MAGIC); + if (cb->cb_remove) + cb->cb_remove(wrk, oc, cb->priv); + } + AZ(pthread_rwlock_unlock(&exphdl->cb_mtx)); +} + /*-------------------------------------------------------------------- * struct exp manipulations */ @@ -130,9 +167,10 @@ exp_mail_it(struct objcore *oc) */ void -EXP_Inject(struct objcore *oc, struct lru *lru) +EXP_Inject(struct worker *wrk, struct objcore *oc, struct lru *lru) { + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); AZ(oc->exp_flags & (OC_EF_OFFLRU | OC_EF_INSERT | OC_EF_MOVE)); @@ -146,6 +184,8 @@ EXP_Inject(struct objcore *oc, struct lru *lru) oc->timer_when = EXP_When(&oc->exp); Lck_Unlock(&lru->mtx); + exp_insert_cb(wrk, oc); + exp_mail_it(oc); } @@ -157,10 +197,11 @@ EXP_Inject(struct objcore *oc, struct lru *lru) */ void -EXP_Insert(struct objcore *oc) +EXP_Insert(struct worker *wrk, struct objcore *oc) { struct lru *lru; + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); HSH_Ref(oc); @@ -177,6 +218,8 @@ EXP_Insert(struct objcore *oc) oc->exp_flags |= OC_EF_MOVE; Lck_Unlock(&lru->mtx); + exp_insert_cb(wrk, oc); + exp_mail_it(oc); } @@ -344,6 +387,32 @@ EXP_NukeOne(struct worker *wrk, struct lru *lru) return (1); } +void +EXP_Reg_Callback(struct exp_callback *cb) +{ + CHECK_OBJ_NOTNULL(cb, EXP_CALLBACK_MAGIC); + AZ(pthread_rwlock_wrlock(&exphdl->cb_mtx)); + VTAILQ_INSERT_TAIL(&exphdl->cb_list, cb, list); + AZ(pthread_rwlock_unlock(&exphdl->cb_mtx)); +} + +void +EXP_Dereg_Callback(struct exp_callback *cb) +{ + struct exp_callback *cb2; + + CHECK_OBJ_NOTNULL(cb, EXP_CALLBACK_MAGIC); + AZ(pthread_rwlock_wrlock(&exphdl->cb_mtx)); + VTAILQ_FOREACH(cb2, &exphdl->cb_list, list) { + CHECK_OBJ_NOTNULL(cb2, EXP_CALLBACK_MAGIC); + if (cb2 == cb) + break; + } + AN(cb2); + VTAILQ_REMOVE(&exphdl->cb_list, cb2, list); + AZ(pthread_rwlock_unlock(&exphdl->cb_mtx)); +} + /*-------------------------------------------------------------------- * Handle stuff in the inbox */ @@ -385,6 +454,7 @@ exp_inbox(struct exp_priv *ep, struct objcore *oc, double now) binheap_delete(ep->heap, oc->timer_idx); } assert(oc->timer_idx == BINHEAP_NOIDX); + exp_remove_cb(ep->wrk, oc); (void)HSH_DerefObjCore(ep->wrk, &oc); return; } @@ -464,6 +534,7 @@ exp_expire(struct exp_priv *ep, double now) CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC); VSLb(&ep->vsl, SLT_ExpKill, "EXP_Expired x=%u t=%.0f", ObjGetXID(ep->wrk, oc), EXP_Ttl(NULL, &oc->exp) - now); + exp_remove_cb(ep->wrk, oc); (void)HSH_DerefObjCore(ep->wrk, &oc); return (0); } @@ -545,6 +616,8 @@ EXP_Init(void) Lck_New(&ep->mtx, lck_exp); AZ(pthread_cond_init(&ep->condvar, NULL)); VTAILQ_INIT(&ep->inbox); + AZ(pthread_rwlock_init(&ep->cb_mtx, NULL)); + VTAILQ_INIT(&ep->cb_list); exphdl = ep; WRK_BgThread(&pt, "cache-timeout", exp_thread, ep); } diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c index de35e8a..686d398 100644 --- a/bin/varnishd/cache/cache_hash.c +++ b/bin/varnishd/cache/cache_hash.c @@ -693,7 +693,7 @@ HSH_Unbusy(struct worker *wrk, struct objcore *oc) if (!(oc->flags & OC_F_PRIVATE)) { BAN_NewObjCore(oc); - EXP_Insert(oc); + EXP_Insert(wrk, oc); AN(oc->exp_flags & OC_EF_EXP); AN(oc->ban); } diff --git a/bin/varnishd/storage/storage_persistent_silo.c b/bin/varnishd/storage/storage_persistent_silo.c index 8d02ed3..8119aad 100644 --- a/bin/varnishd/storage/storage_persistent_silo.c +++ b/bin/varnishd/storage/storage_persistent_silo.c @@ -166,7 +166,7 @@ smp_load_seg(struct worker *wrk, const struct smp_sc *sc, oc->ban = BAN_RefBan(oc, so->ban, sc->tailban); HSH_Insert(wrk, so->hash, oc); oc->exp = so->exp; - EXP_Inject(oc, sg->lru); + EXP_Inject(wrk, oc, sg->lru); sg->nobj++; } Pool_Sumstat(wrk); -- 2.1.4 From dridi.boukelmoune at zenika.com Mon Feb 16 23:09:40 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Tue, 17 Feb 2015 00:09:40 +0100 Subject: [PATCH] HTTP date parsing and formatting Message-ID: Hi, I have a set of patches that serve two purposes: - fixing the current date parsing - not breaking when the locale changes --->8-------->8-------->8-------->8-------->8-------->8-------->8-------->8--- 0001-Add-a-new-function-VOID-debug.setlocale-STRING.patch This patches adds a setlocale function to the debug vmod, because only a vmod could change the locale. It also improves the test m00020 and makes it run again with a different locale. 0002-Replace-strftime-3-for-HTTP-1.1-date-formatting.patch 0003-Replace-strptime-3-for-HTTP-date-parsing.patch Straightforward replacement for strftime, a little more effort has been put in strptime's replacement (the latter is not compliant anyway). 0004-Add-a-new-VNUM_2real-function.patch Because when you think you're done, it turns out the whole libc relies on a process-wide locale. 0005-Remove-support-for-the-ISO-8601-date-format.patch Bonus patch, I don't know where the support for this date format comes from. If you decide to remove it, you can apply this patch. --->8-------->8-------->8-------->8-------->8-------->8-------->8-------->8--- I'm willing to continue looking for locale-dependant code and sending patches, I'd like to see how this patch set will be commented first. The locale part is really just for vmods that may do a library call that would lead to setlocale. If you don't want the locale part, I can send a smaller patch set that will only improve the date parsing correctness (and the test case). Best Regards, Dridi -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Add-a-new-function-VOID-debug.setlocale-STRING.patch Type: text/x-patch Size: 3783 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Replace-strftime-3-for-HTTP-1.1-date-formatting.patch Type: text/x-patch Size: 1657 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0003-Replace-strptime-3-for-HTTP-date-parsing.patch Type: text/x-patch Size: 5034 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0004-Add-a-new-VNUM_2real-function.patch Type: text/x-patch Size: 4655 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0005-Remove-support-for-the-ISO-8601-date-format.patch Type: text/x-patch Size: 4047 bytes Desc: not available URL: From phk at phk.freebsd.dk Wed Feb 18 19:19:58 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 18 Feb 2015 19:19:58 +0000 Subject: Jail, outstanding details In-Reply-To: References: Message-ID: <89611.1424287198@critter.freebsd.dk> In message , Poul-Henning Kamp writes: > Move creation of workdir into jail code, and use the master HIGH/LOW > around socket operations which may be on reserved ports. This is all presuming jail=unix which means Varnish was started as root. I am uncertain if creating/opening the storage files should be done at "MASTER_HIGH" (= root) or "MASTER_LOW" (= varnish user) privilege level. I'm sort of leaning "MASTER_HIGH" on general principles, but if anybody has input, I'd like to hear it... -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From slink at schokola.de Thu Feb 19 09:40:15 2015 From: slink at schokola.de (Nils Goroll) Date: Thu, 19 Feb 2015 10:40:15 +0100 Subject: Jail, outstanding details In-Reply-To: <89611.1424287198@critter.freebsd.dk> References: <89611.1424287198@critter.freebsd.dk> Message-ID: <54E5AF7F.4080301@schokola.de> On 18/02/15 20:19, Poul-Henning Kamp wrote: > I am uncertain if creating/opening the storage files should be done > at "MASTER_HIGH" (= root) or "MASTER_LOW" (= varnish user) privilege > level. Going back to the loose end of our previous discussion, I'd see storage files on the same level as _.secret in the suggested directory/permissions scheme: 640 $master_user:$group where $master_user=root for unix jail or something else for other, $group=varnish per default On 12/02/15 13:36, Nils Goroll wrote: > # e.g. /tmp/varnish_name > > -n directory: 755 $master_user:$group > _.vsm 640 $master_user:$group_vsm (!feature::public_vsm) > _.vsm 644 $master_user:$group_vsm (feature::public_vsm) > _.secret 640 $master_user:$group > > # e.g. /tmp/varnish_name.vcc > > $vcc_dir: 750 $user:$group > vcl.*.c 660 $user:$group (temporary file) > vcl.*.so 440 $user:$group From martin at varnish-software.com Thu Feb 19 10:00:13 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 19 Feb 2015 11:00:13 +0100 Subject: Jail, outstanding details In-Reply-To: <89611.1424287198@critter.freebsd.dk> References: <89611.1424287198@critter.freebsd.dk> Message-ID: On 18 February 2015 at 20:19, Poul-Henning Kamp wrote: > In message , Poul-Henning > Kamp > writes: > > > Move creation of workdir into jail code, and use the master HIGH/LOW > > around socket operations which may be on reserved ports. > > This is all presuming jail=unix which means Varnish was started as root. > > I am uncertain if creating/opening the storage files should be done > at "MASTER_HIGH" (= root) or "MASTER_LOW" (= varnish user) privilege > level. > We've been looking at the option of having block device as storage instead of going through the filesystem. These devices usually have special rules setting up the permissions and such on each boot, making it useful to be root to avoid permission problems when opening the device. (Changing the rules to assign the device to the specified user is probably the "right" way of doing things though). Martin > > I'm sort of leaning "MASTER_HIGH" on general principles, but if anybody > has input, I'd like to hear it... > > -- > Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 > phk at FreeBSD.ORG | TCP/IP since RFC 956 > FreeBSD committer | BSD since 4.3-tahoe > Never attribute to malice what can adequately be explained by incompetence. > > _______________________________________________ > varnish-dev mailing list > varnish-dev at varnish-cache.org > https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev > -- *Martin Blix Grydeland* Senior Developer | Varnish Software AS Mobile: +47 992 74 756 We Make Websites Fly! -------------- next part -------------- An HTML attachment was scrubbed... URL: From phk at phk.freebsd.dk Fri Feb 20 11:14:48 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Fri, 20 Feb 2015 11:14:48 +0000 Subject: Jail, outstanding details In-Reply-To: References: <89611.1424287198@critter.freebsd.dk> Message-ID: <62874.1424430888@critter.freebsd.dk> -------- In message , Martin Blix Grydeland writes: >--001a114031a08be765050f6e0046 >Content-Type: text/plain; charset=UTF-8 > >On 18 February 2015 at 20:19, Poul-Henning Kamp wrote: > >> In message , Poul-Henning >> Kamp >> writes: >> >> > Move creation of workdir into jail code, and use the master HIGH/LOW >> > around socket operations which may be on reserved ports. >> >> This is all presuming jail=unix which means Varnish was started as root. >> >> I am uncertain if creating/opening the storage files should be done >> at "MASTER_HIGH" (= root) or "MASTER_LOW" (= varnish user) privilege >> level. > >We've been looking at the option of having block device as storage instead >of going through the filesystem. These devices usually have special rules >setting up the permissions and such on each boot [...] Yes, that's one of the many reasons why I think that storage files belong in the "command line domain", and neither CLI users nor anybody else should needs or can beneficially use access to the storage files "out of band". -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From phk at phk.freebsd.dk Tue Feb 24 09:26:07 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Tue, 24 Feb 2015 09:26:07 +0000 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: References: Message-ID: <91146.1424769967@critter.freebsd.dk> -------- In message , Dridi Boukelmoune writes: >I have a set of patches that serve two purposes: >- fixing the current date parsing >- not breaking when the locale changes I think I have solved these issues now. Thank you for sending patches, even though I hardly used them in this case, mostly because I wanted to go a little deeper in the solution than you did. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From dridi.boukelmoune at zenika.com Tue Feb 24 12:08:05 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Tue, 24 Feb 2015 13:08:05 +0100 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: <91146.1424769967@critter.freebsd.dk> References: <91146.1424769967@critter.freebsd.dk> Message-ID: On Tue, Feb 24, 2015 at 10:26 AM, Poul-Henning Kamp wrote: > -------- > In message > , Dridi Boukelmoune writes: > >>I have a set of patches that serve two purposes: >>- fixing the current date parsing >>- not breaking when the locale changes > > I think I have solved these issues now. Yes, my test is passing (see below) > Thank you for sending patches, even though I hardly used them > in this case, mostly because I wanted to go a little deeper in the > solution than you did. A "little" :-) I have one question though. Why did you keep the ISO 8601 format for parsing? Also please find my first patch rebased against the current master. It's an improvement of m00020.vtc (testing both parsing and formatting) and it adds a setlocale capability to the debug vmod. Best Regards, Dridi -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Add-a-new-function-VOID-debug.setlocale-STRING.patch Type: text/x-patch Size: 3438 bytes Desc: not available URL: From phk at phk.freebsd.dk Wed Feb 25 08:46:58 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 25 Feb 2015 08:46:58 +0000 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: References: <91146.1424769967@critter.freebsd.dk> Message-ID: <30841.1424854018@critter.freebsd.dk> -------- In message , Dridi Boukelmoune writes: >Why did you keep the ISO 8601 format for parsing? Because I can't remember why we added it in the first place, and until I dig out notes about that, I won't remove it. >Also please find my first patch rebased against the current master. >It's an improvement of m00020.vtc (testing both parsing and >formatting) and it adds a setlocale capability to the debug vmod. I don't really see the value of the setlocale ? -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From dridi.boukelmoune at zenika.com Wed Feb 25 09:16:45 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Wed, 25 Feb 2015 10:16:45 +0100 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: <30841.1424854018@critter.freebsd.dk> References: <91146.1424769967@critter.freebsd.dk> <30841.1424854018@critter.freebsd.dk> Message-ID: On Wed, Feb 25, 2015 at 9:46 AM, Poul-Henning Kamp wrote: > -------- > In message > , Dridi Boukelmoune writes: > >>Why did you keep the ISO 8601 format for parsing? > > Because I can't remember why we added it in the first place, and until > I dig out notes about that, I won't remove it. It's been added in a commit about gettimeofday: https://github.com/varnish/Varnish-Cache/commit/be358c29.patch >>Also please find my first patch rebased against the current master. >>It's an improvement of m00020.vtc (testing both parsing and >>formatting) and it adds a setlocale capability to the debug vmod. > > I don't really see the value of the setlocale ? I'm setting a different locale for this very test (also part of the patch) but it could be handy in other places. In my initial patch set, I had added it to show how a vmod could compromise the child's locale. You have removed some str?time and strtod calls from the code base but there could be other occurrences to replace (and other functions in libc to not use). As I said in my first email, I am willing to continue submitting locale-related patches, so debug.setlocale would help. Cheers, Dridi From phk at phk.freebsd.dk Wed Feb 25 09:53:26 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 25 Feb 2015 09:53:26 +0000 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: References: <91146.1424769967@critter.freebsd.dk> <30841.1424854018@critter.freebsd.dk> Message-ID: <76303.1424858006@critter.freebsd.dk> -------- In message , Dridi Boukelmoune writes: >I'm setting a different locale for this very test (also part of the >patch) but it could be handy in other places. Yeah, but now that we've rewritten the vtim.c code to not be locale dependent, that's sort of pointless ? >In my initial patch set, I had added it to show how a vmod could >compromise the child's locale. You have removed some str?time and >strtod calls from the code base but there could be other occurrences >to replace (and other functions in libc to not use). Right, but then we'll tackle it in that context, it doesn't belong in m00020 any more. >As I said in my first email, I am willing to continue submitting >locale-related patches, so debug.setlocale would help. By all means use it to find such trouble! I just don't see the point in adding it to the vmod on a permanent basis, unless we have code we specifically need to test. (Remember: Not all machines may have any given locale installed...) -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From dridi.boukelmoune at zenika.com Wed Feb 25 14:23:24 2015 From: dridi.boukelmoune at zenika.com (Dridi Boukelmoune) Date: Wed, 25 Feb 2015 15:23:24 +0100 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: <76303.1424858006@critter.freebsd.dk> References: <91146.1424769967@critter.freebsd.dk> <30841.1424854018@critter.freebsd.dk> <76303.1424858006@critter.freebsd.dk> Message-ID: On Wed, Feb 25, 2015 at 10:53 AM, Poul-Henning Kamp wrote: > Yeah, but now that we've rewritten the vtim.c code to not be locale > dependent, that's sort of pointless ? Pointless but convenient because I would not have to keep it around in my working copy. > By all means use it to find such trouble! I just don't see the point > in adding it to the vmod on a permanent basis, unless we have code > we specifically need to test. Mainly for my very own convenience :) Also to sort of "document" locale awareness in relevant test cases, like regression tests would do with a bug that no longer exists. > (Remember: Not all machines may have any given locale installed...) That is indeed a problem. I suppose I could enforce a set of locales at build time with autoconf, but it may not be welcomed. I'll simply keep debug.setlocale around to find more issues. Cheers, Dridi From phk at phk.freebsd.dk Wed Feb 25 14:35:19 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 25 Feb 2015 14:35:19 +0000 Subject: [PATCH] HTTP date parsing and formatting In-Reply-To: References: <91146.1424769967@critter.freebsd.dk> <30841.1424854018@critter.freebsd.dk> <76303.1424858006@critter.freebsd.dk> Message-ID: <55397.1424874919@critter.freebsd.dk> -------- In message , Dridi Boukelmoune writes: >> (Remember: Not all machines may have any given locale installed...) > >That is indeed a problem. I suppose I could enforce a set of locales >at build time with autoconf, but it may not be welcomed. I really want to try to keep all the locale-mess out of Varnish if at all possible, which is why I welcome your effort to identify it :-) >I'll simply keep debug.setlocale around to find more issues. Please do. -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From martin at varnish-software.com Wed Feb 25 15:00:13 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Wed, 25 Feb 2015 16:00:13 +0100 Subject: [PATCH 1/3] Implement the init counter hack in vmod_debug Message-ID: <1424876415-8809-1-git-send-email-martin@varnish-software.com> This allows one time only initialization for vmods --- lib/libvmod_debug/vmod_debug.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/libvmod_debug/vmod_debug.c b/lib/libvmod_debug/vmod_debug.c index b6736fb..7bd719f 100644 --- a/lib/libvmod_debug/vmod_debug.c +++ b/lib/libvmod_debug/vmod_debug.c @@ -36,6 +36,8 @@ #include "vrt.h" #include "vcc_if.h" +static int n_init = 0; + VCL_VOID __match_proto__(td_debug_panic) vmod_panic(VRT_CTX, const char *str, ...) { @@ -65,13 +67,23 @@ vmod_author(VRT_CTX, VCL_ENUM id) WRONG("Illegal VMOD enum"); } +static void +fini_function(void *priv) +{ + assert(priv == &n_init); + + assert(n_init > 0); + n_init--; +} + int init_function(struct vmod_priv *priv, const struct VCL_conf *cfg) { (void)cfg; - priv->priv = strdup("FOO"); - priv->free = free; + priv->priv = &n_init; + priv->free = fini_function; + n_init++; return (0); } @@ -105,7 +117,7 @@ vmod_test_priv_vcl(VRT_CTX, struct vmod_priv *priv) { CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); - assert(!strcmp(priv->priv, "FOO")); + assert(priv->priv == &n_init); } VCL_BLOB -- 2.1.4 From martin at varnish-software.com Wed Feb 25 15:00:14 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Wed, 25 Feb 2015 16:00:14 +0100 Subject: [PATCH 2/3] Make vmod_debug register for expiry callbacks In-Reply-To: <1424876415-8809-1-git-send-email-martin@varnish-software.com> References: <1424876415-8809-1-git-send-email-martin@varnish-software.com> Message-ID: <1424876415-8809-2-git-send-email-martin@varnish-software.com> --- lib/libvmod_debug/vmod_debug.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/libvmod_debug/vmod_debug.c b/lib/libvmod_debug/vmod_debug.c index 7bd719f..69256d2 100644 --- a/lib/libvmod_debug/vmod_debug.c +++ b/lib/libvmod_debug/vmod_debug.c @@ -38,6 +38,26 @@ static int n_init = 0; +static struct exp_callback exp_cb; + +static void __match_proto__(exp_callback_f) +exp_cb_insert(struct worker *wrk, struct objcore *oc, void *priv) +{ + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + VSL(SLT_Debug, 0, "exp_cb_insert: %p", oc); + assert(priv == &exp_cb); +} + +static void __match_proto__(exp_callback_f) +exp_cb_remove(struct worker *wrk, struct objcore *oc, void *priv) +{ + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + VSL(SLT_Debug, 0, "exp_cb_remove: %p", oc); + assert(priv == &exp_cb); +} + VCL_VOID __match_proto__(td_debug_panic) vmod_panic(VRT_CTX, const char *str, ...) { @@ -74,6 +94,10 @@ fini_function(void *priv) assert(n_init > 0); n_init--; + if (n_init == 0) { + EXP_Dereg_Callback(&exp_cb); + VSL(SLT_Debug, 0, "exp_cb: deregistered"); + } } int @@ -83,6 +107,15 @@ init_function(struct vmod_priv *priv, const struct VCL_conf *cfg) priv->priv = &n_init; priv->free = fini_function; + if (n_init == 0) { + memset(&exp_cb, 0, sizeof exp_cb); + exp_cb.magic = EXP_CALLBACK_MAGIC; + exp_cb.cb_insert = exp_cb_insert; + exp_cb.cb_remove = exp_cb_remove; + exp_cb.priv = &exp_cb; + EXP_Reg_Callback(&exp_cb); + VSL(SLT_Debug, 0, "exp_cb: registered"); + } n_init++; return (0); } -- 2.1.4 From martin at varnish-software.com Wed Feb 25 15:00:15 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Wed, 25 Feb 2015 16:00:15 +0100 Subject: [PATCH 3/3] Test case for expiry callbacks In-Reply-To: <1424876415-8809-1-git-send-email-martin@varnish-software.com> References: <1424876415-8809-1-git-send-email-martin@varnish-software.com> Message-ID: <1424876415-8809-3-git-send-email-martin@varnish-software.com> --- bin/varnishtest/tests/m00021.vtc | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 bin/varnishtest/tests/m00021.vtc diff --git a/bin/varnishtest/tests/m00021.vtc b/bin/varnishtest/tests/m00021.vtc new file mode 100644 index 0000000..df70e02 --- /dev/null +++ b/bin/varnishtest/tests/m00021.vtc @@ -0,0 +1,45 @@ +varnishtest "Test expiry callbacks" + +server s1 { + rxreq + txresp +} -start + +varnish v1 -vcl+backend {} -start + +varnish v1 -cliok "param.set debug +vclrel" + +logexpect l1 -v v1 -g raw { + expect * 0 Debug "exp_cb: registered" + expect * 0 Debug "exp_cb_insert: 0x[0-9a-f]+" + expect * 0 Debug "exp_cb_remove: 0x[0-9a-f]+" + expect * 0 Debug "exp_cb: deregistered" +} -start + +varnish v1 -vcl+backend { + import ${vmod_debug}; + + sub vcl_recv { + if (req.method == "PURGE") { + return (purge); + } + } +} + +client c1 { + txreq + rxresp + expect resp.status == 200 + + txreq -req PURGE + rxresp +} -run +varnish v1 -expect n_object == 0 + +varnish v1 -vcl+backend {} +varnish v1 -cliok "vcl.discard vcl2" +varnish v1 -cliok "debug.vmod" +varnish v1 -cliok "vcl.list" +varnish v1 -expect vmods == 0 + +logexpect l1 -wait -- 2.1.4 From phk at phk.freebsd.dk Wed Feb 25 19:40:41 2015 From: phk at phk.freebsd.dk (Poul-Henning Kamp) Date: Wed, 25 Feb 2015 19:40:41 +0000 Subject: [PATCH 2/3] Make vmod_debug register for expiry callbacks In-Reply-To: <1424876415-8809-2-git-send-email-martin@varnish-software.com> References: <1424876415-8809-1-git-send-email-martin@varnish-software.com> <1424876415-8809-2-git-send-email-martin@varnish-software.com> Message-ID: <96426.1424893241@critter.freebsd.dk> -------- In message <1424876415-8809-2-git-send-email-martin at varnish-software.com>, Mart in Blix Grydeland writes: I'm not so keen on vmod_debug _always_ doing this, in particular not the VSL spamming. Isn't there some more delicate way we can do it ? From vcl_init{} maybe ? >--- > lib/libvmod_debug/vmod_debug.c | 33 +++++++++++++++++++++++++++++++++ > 1 file changed, 33 insertions(+) > >diff --git a/lib/libvmod_debug/vmod_debug.c b/lib/libvmod_debug/vmod_debug.c >index 7bd719f..69256d2 100644 >--- a/lib/libvmod_debug/vmod_debug.c >+++ b/lib/libvmod_debug/vmod_debug.c >@@ -38,6 +38,26 @@ > > static int n_init = 0; > >+static struct exp_callback exp_cb; >+ >+static void __match_proto__(exp_callback_f) >+exp_cb_insert(struct worker *wrk, struct objcore *oc, void *priv) >+{ >+ >+ CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); >+ VSL(SLT_Debug, 0, "exp_cb_insert: %p", oc); >+ assert(priv == &exp_cb); >+} >+ >+static void __match_proto__(exp_callback_f) >+exp_cb_remove(struct worker *wrk, struct objcore *oc, void *priv) >+{ >+ >+ CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); >+ VSL(SLT_Debug, 0, "exp_cb_remove: %p", oc); >+ assert(priv == &exp_cb); >+} >+ > VCL_VOID __match_proto__(td_debug_panic) > vmod_panic(VRT_CTX, const char *str, ...) > { >@@ -74,6 +94,10 @@ fini_function(void *priv) > > assert(n_init > 0); > n_init--; >+ if (n_init == 0) { >+ EXP_Dereg_Callback(&exp_cb); >+ VSL(SLT_Debug, 0, "exp_cb: deregistered"); >+ } > } > > int >@@ -83,6 +107,15 @@ init_function(struct vmod_priv *priv, const struct VCL_conf *cfg) > > priv->priv = &n_init; > priv->free = fini_function; >+ if (n_init == 0) { >+ memset(&exp_cb, 0, sizeof exp_cb); >+ exp_cb.magic = EXP_CALLBACK_MAGIC; >+ exp_cb.cb_insert = exp_cb_insert; >+ exp_cb.cb_remove = exp_cb_remove; >+ exp_cb.priv = &exp_cb; >+ EXP_Reg_Callback(&exp_cb); >+ VSL(SLT_Debug, 0, "exp_cb: registered"); >+ } > n_init++; > return (0); > } >-- >2.1.4 > -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk at FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence. From arianna.aondio at varnish-software.com Thu Feb 26 09:32:23 2015 From: arianna.aondio at varnish-software.com (Arianna Aondio) Date: Thu, 26 Feb 2015 10:32:23 +0100 Subject: Cache request body and user-accesible functions. Message-ID: VDD Hamburg talking point: Context: Starting from Varnish 4 we can buffer the request body (usually POST and PUT requests) before sending it to the backend. Now we have just one function accessible to users: std.cache_req_body(BYTES size) which initializes the buffering. Once the request body has been cached, it can be consumed as many times as needed, making it available to other user-accesible functions, such as: * request body length access function * regular expression match on request body * regular expression substitution on request body * request body as input in vcl_hash Problems: 1. Bug #1664, std.cache_req_body(BYTES size) lacks of errors handling, if it is called with a request body bigger than size, Varnish crashes and if we have a chunked request the function will cache every request bodies ignoring the provided size limitation. 2. Regular expression match on body: how do we want the user interface to be, do we want the function to return a boolean indicating if the request body contains the string the user is looking for? In VCL this can look like : sub vcl_recv { set req.http.x-boolean1 = std.regex_req_body("varnish rocks"); } Or do we want to be more aligned with the regex syntax and make the request body completely available to the user? In VCL this can look like : sub vcl_recv { if (std.reqbody_re_match() ~ "varnish rocks") { .... } } 3. Regular expression substitution on body, this function needs to be discussed. Do we really need to be able to substitute on the request body? Is it safe? How do we handle the possible increase of request body? Proposed solutions: 1. As decided a couple of weeks ago during a bugwash, we either buffer the whole request body or fail the request. I have a patch for this: if the request body is bigger than the given size, we close the connection and move forward to the next request. 2. && 3. to be discussed. Request body length access function: once the request body has been cached, we can then iterate over it and return the number of bytes. Request body as input in vcl_hash: once the request body has been cached, we can hash on it. This function should be available just in vcl_hash. Until now we have always just hashed on strings, but if we want to hash on bodies we need to be aware that they can be binary, so we need to handle this properly. I think functions regarding request body manipulation should be part of the std.vmod. General considerations: Request bodies may contains binary data that headers should not contain. Functions have to be able to handle any kind of request body. -- Arianna Aondio Software Developer | Varnish Software AS Mobile: +47 980 62 619 We Make Websites Fly www.varnish-software.com From slink at schokola.de Thu Feb 26 10:27:05 2015 From: slink at schokola.de (Nils Goroll) Date: Thu, 26 Feb 2015 11:27:05 +0100 Subject: suggesting to increase timeout_req Message-ID: <54EEF4F9.9050208@schokola.de> This tcpdump output illustrates an issue we seem to have with default Linux tcp timeouts and the default timeout_req of 2 seconds: 16:47:44.542049 IP client.49550 > varnish.80: Flags [S], seq 29295818, win 4380, options [mss 1460,sackOK,eol], length 0 16:47:44.542080 IP varnish.80 > client.49550: Flags [S.], seq 3652568857, ack 29295819, win 29200, options [mss 1460,nop,nop,sackOK], length 0 16:47:44.542250 IP client.49550 > varnish.80: Flags [.], ack 1, win 4380, length 0 16:47:46.080501 IP client.49550 > varnish.80: Flags [P.], seq 1:1453, ack 1, win 4380, length 1452 16:47:46.080528 IP varnish.80 > client.49550: Flags [.], ack 1453, win 31944, length 0 16:47:48.082783 IP varnish.80 > client.49550: Flags [F.], seq 1, ack 1453, win 31944, length 0 16:47:48.083070 IP client.49550 > varnish.80: Flags [.], ack 2, win 4380, length 0 16:47:48.350763 IP client.49550 > varnish.80: Flags [P.], seq 1453:2905, ack 2, win 4380, length 1452 16:47:48.350792 IP varnish.80 > client.49550: Flags [R], seq 3652568859, win 0, length 0 The packet at 16:47:46.080501 contains the first part of a request up to the start of a very long cookie line. At 16:47:48 varnish closes after reaching timeout_req of 2s. Then, the client immediately acks. My understanding is that the varnish->client ack 1453 got lost and the client did not get around to retransmit seq 1:1453 before we timed out. The most helpful online reference regarding recommended initial tcp retransmittion timeouts I have found so far is http://tools.ietf.org/html/rfc6298#ref-PA00 In summary, an initial timeout (RTO) of 1s is now recommended, but the former 3s RTO remains valid. So, for any client following the former 3s recommendation, current we don't even tolerate a single packet retransmission after 3way is complete. For those clients following the new 1s recommended RTO, timing is also really tight it seems unlikely that we tolerate retransmission of two packets. Based on this, I'd suggest to raise the default timeout_req to 7 seconds to allow for two retransmissions at RTO=3. This seems to be particularly relevant with the growing popularity of mobile clients. The risk is increased resource usage for malicious requests. To address it, I'd suggest to document that lowering timeout_req can be an option to mitigate certain DoS (slowloris) attacks. Nils From martin at varnish-software.com Thu Feb 26 11:00:23 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 26 Feb 2015 12:00:23 +0100 Subject: [PATCH 2/3] Add a function to register expiry callbacks in vmod_debug In-Reply-To: <1424948424-30508-1-git-send-email-martin@varnish-software.com> References: <1424948424-30508-1-git-send-email-martin@varnish-software.com> Message-ID: <1424948424-30508-2-git-send-email-martin@varnish-software.com> --- lib/libvmod_debug/vmod.vcc | 4 ++++ lib/libvmod_debug/vmod_debug.c | 47 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/lib/libvmod_debug/vmod.vcc b/lib/libvmod_debug/vmod.vcc index b5d108c..8e940f4 100644 --- a/lib/libvmod_debug/vmod.vcc +++ b/lib/libvmod_debug/vmod.vcc @@ -93,3 +93,7 @@ Encrypt the HTTP header with quad-ROT13 encryption, $Function STRING argtest(STRING one, REAL two=2, STRING three="3") $Function INT vre_limit() + +$Function VOID register_exp_callback(PRIV_VCL) + +Register the vmod to receive expiry callbacks diff --git a/lib/libvmod_debug/vmod_debug.c b/lib/libvmod_debug/vmod_debug.c index 72bbbe4..7d21c16 100644 --- a/lib/libvmod_debug/vmod_debug.c +++ b/lib/libvmod_debug/vmod_debug.c @@ -40,6 +40,7 @@ struct priv_vcl { unsigned magic; #define PRIV_VCL_MAGIC 0x8E62FA9D char *foo; + struct exp_callback *exp_cb; }; VCL_VOID __match_proto__(td_debug_panic) @@ -176,6 +177,45 @@ vmod_vre_limit(VRT_CTX) return (cache_param->vre_limits.match); } +static void __match_proto__(exp_callback_f) +exp_cb_insert(struct worker *wrk, struct objcore *oc, void *priv) +{ + const struct priv_vcl *priv_vcl; + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + CAST_OBJ_NOTNULL(priv_vcl, priv, PRIV_VCL_MAGIC); + VSL(SLT_Debug, 0, "exp_cb: insert %p", oc); +} + +static void __match_proto__(exp_callback_f) +exp_cb_remove(struct worker *wrk, struct objcore *oc, void *priv) +{ + const struct priv_vcl *priv_vcl; + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC); + CAST_OBJ_NOTNULL(priv_vcl, priv, PRIV_VCL_MAGIC); + VSL(SLT_Debug, 0, "exp_cb: remove %p", oc); +} + +VCL_VOID +vmod_register_exp_callback(VRT_CTX, struct vmod_priv *priv) +{ + struct priv_vcl *priv_vcl; + + CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); + CAST_OBJ_NOTNULL(priv_vcl, priv->priv, PRIV_VCL_MAGIC); + AZ(priv_vcl->exp_cb); + ALLOC_OBJ(priv_vcl->exp_cb, EXP_CALLBACK_MAGIC); + AN(priv_vcl->exp_cb); + priv_vcl->exp_cb->cb_insert = exp_cb_insert; + priv_vcl->exp_cb->cb_remove = exp_cb_remove; + priv_vcl->exp_cb->priv = priv_vcl; + EXP_Reg_Callback(priv_vcl->exp_cb); + VSL(SLT_Debug, 0, "exp_cb: registered"); +} + static void __match_proto__(vmod_priv_free_f) priv_vcl_free(void *priv) { @@ -184,6 +224,13 @@ priv_vcl_free(void *priv) CAST_OBJ_NOTNULL(priv_vcl, priv, PRIV_VCL_MAGIC); AN(priv_vcl->foo); free(priv_vcl->foo); + if (priv_vcl->exp_cb != NULL) { + CHECK_OBJ_NOTNULL(priv_vcl->exp_cb, EXP_CALLBACK_MAGIC); + EXP_Dereg_Callback(priv_vcl->exp_cb); + FREE_OBJ(priv_vcl->exp_cb); + AZ(priv_vcl->exp_cb); + VSL(SLT_Debug, 0, "exp_cb: deregistered"); + } FREE_OBJ(priv_vcl); AZ(priv_vcl); } -- 2.1.4 From martin at varnish-software.com Thu Feb 26 11:00:22 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 26 Feb 2015 12:00:22 +0100 Subject: [PATCH 1/3] Change vmod_debug's PRIV_VCL to point to a struct instead of a single value Message-ID: <1424948424-30508-1-git-send-email-martin@varnish-software.com> This is in preparation for being able to use it for storing multiple values --- lib/libvmod_debug/vmod_debug.c | 50 ++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/lib/libvmod_debug/vmod_debug.c b/lib/libvmod_debug/vmod_debug.c index b6736fb..72bbbe4 100644 --- a/lib/libvmod_debug/vmod_debug.c +++ b/lib/libvmod_debug/vmod_debug.c @@ -36,6 +36,12 @@ #include "vrt.h" #include "vcc_if.h" +struct priv_vcl { + unsigned magic; +#define PRIV_VCL_MAGIC 0x8E62FA9D + char *foo; +}; + VCL_VOID __match_proto__(td_debug_panic) vmod_panic(VRT_CTX, const char *str, ...) { @@ -65,16 +71,6 @@ vmod_author(VRT_CTX, VCL_ENUM id) WRONG("Illegal VMOD enum"); } -int -init_function(struct vmod_priv *priv, const struct VCL_conf *cfg) -{ - (void)cfg; - - priv->priv = strdup("FOO"); - priv->free = free; - return (0); -} - VCL_VOID __match_proto__(td_debug_test_priv_call) vmod_test_priv_call(VRT_CTX, struct vmod_priv *priv) { @@ -103,9 +99,13 @@ vmod_test_priv_task(VRT_CTX, struct vmod_priv *priv, VCL_STRING s) VCL_VOID __match_proto__(td_debug_test_priv_vcl) vmod_test_priv_vcl(VRT_CTX, struct vmod_priv *priv) { + struct priv_vcl *priv_vcl; CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); - assert(!strcmp(priv->priv, "FOO")); + AN(priv); + CAST_OBJ_NOTNULL(priv_vcl, priv->priv, PRIV_VCL_MAGIC); + AN(priv_vcl->foo); + assert(!strcmp(priv_vcl->foo, "FOO")); } VCL_BLOB @@ -175,3 +175,31 @@ vmod_vre_limit(VRT_CTX) (void)ctx; return (cache_param->vre_limits.match); } + +static void __match_proto__(vmod_priv_free_f) +priv_vcl_free(void *priv) +{ + struct priv_vcl *priv_vcl; + + CAST_OBJ_NOTNULL(priv_vcl, priv, PRIV_VCL_MAGIC); + AN(priv_vcl->foo); + free(priv_vcl->foo); + FREE_OBJ(priv_vcl); + AZ(priv_vcl); +} + +int __match_proto__(vmod_init_f) +init_function(struct vmod_priv *priv, const struct VCL_conf *cfg) +{ + struct priv_vcl *priv_vcl; + + (void)cfg; + + ALLOC_OBJ(priv_vcl, PRIV_VCL_MAGIC); + AN(priv_vcl); + priv_vcl->foo = strdup("FOO"); + AN(priv_vcl->foo); + priv->priv = priv_vcl; + priv->free = priv_vcl_free; + return (0); +} -- 2.1.4 From martin at varnish-software.com Thu Feb 26 11:00:24 2015 From: martin at varnish-software.com (Martin Blix Grydeland) Date: Thu, 26 Feb 2015 12:00:24 +0100 Subject: [PATCH 3/3] Test case for expiry callbacks In-Reply-To: <1424948424-30508-1-git-send-email-martin@varnish-software.com> References: <1424948424-30508-1-git-send-email-martin@varnish-software.com> Message-ID: <1424948424-30508-3-git-send-email-martin@varnish-software.com> --- bin/varnishtest/tests/m00021.vtc | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 bin/varnishtest/tests/m00021.vtc diff --git a/bin/varnishtest/tests/m00021.vtc b/bin/varnishtest/tests/m00021.vtc new file mode 100644 index 0000000..f13a6fb --- /dev/null +++ b/bin/varnishtest/tests/m00021.vtc @@ -0,0 +1,49 @@ +varnishtest "Test expiry callbacks" + +server s1 { + rxreq + txresp +} -start + +varnish v1 -vcl+backend {} -start + +varnish v1 -cliok "param.set debug +vclrel" + +logexpect l1 -v v1 -g raw { + expect * 0 Debug "exp_cb: registered" + expect * 0 Debug "exp_cb: insert 0x[0-9a-f]+" + expect * 0 Debug "exp_cb: remove 0x[0-9a-f]+" + expect * 0 Debug "exp_cb: deregistered" +} -start + +varnish v1 -vcl+backend { + import ${vmod_debug}; + + sub vcl_init { + debug.register_exp_callback(); + } + + sub vcl_recv { + if (req.method == "PURGE") { + return (purge); + } + } +} + +client c1 { + txreq + rxresp + expect resp.status == 200 + + txreq -req PURGE + rxresp +} -run +varnish v1 -expect n_object == 0 + +varnish v1 -vcl+backend {} +varnish v1 -cliok "vcl.discard vcl2" +varnish v1 -cliok "debug.vmod" +varnish v1 -cliok "vcl.list" +varnish v1 -expect vmods == 0 + +logexpect l1 -wait -- 2.1.4 From fgsch at lodoss.net Fri Feb 27 11:25:44 2015 From: fgsch at lodoss.net (Federico Schwindt) Date: Fri, 27 Feb 2015 11:25:44 +0000 Subject: Cache request body and user-accesible functions. In-Reply-To: References: Message-ID: Thinking out loud.. For # 2, what about something like this? req.body.data req.body.length req.body.is_binary (Content-Length != strlen) or: req.body.blob req.body.string req.body.is_blob My reasoning for this is to be able to use existing functions / vmods - I expect the body to be urlencoded most of the time. For binary (is_binary) or blob (is_blob) we'll need new functions that take he length, e.g. hash_ndata(req.body.data, req.body.len) or use the blob directly e.g. hash_blob(req.body.blob). That said, this makes the caller responsible for using the right interface so it might not be the right approach. OTOH having a set of special functions to work with the body means we're defining (limiting?) what can be done until we have body aware vmods. One way to get away with this, although fugly, could be by changing signatures, restricting arguments in the vcc compiler and making these functions a bit smarter, e.g.: hash_data(req.body); In this case hash_data() will internally know what (length) to use. This might work in Varnish core but will require specific handling outside though. Another alternative would be to not handle binary data at all. req.body will always be non-binary. If you want to handle binary data you will have to use a function to get it. After all we don't currently handle binary data (well, null bytes) and I'm not sure how useful would be outside hashing. My 2 cents. On Thu, Feb 26, 2015 at 9:32 AM, Arianna Aondio < arianna.aondio at varnish-software.com> wrote: > VDD Hamburg talking point: > > Context: > Starting from Varnish 4 we can buffer the request body (usually POST > and PUT requests) before sending it to the backend. > Now we have just one function accessible to users: > std.cache_req_body(BYTES size) which initializes the buffering. > Once the request body has been cached, it can be consumed as many > times as needed, making it available to other user-accesible > functions, such as: > * request body length access function > * regular expression match on request body > * regular expression substitution on request body > * request body as input in vcl_hash > > Problems: > 1. Bug #1664, std.cache_req_body(BYTES size) lacks of errors handling, > if it is called with a request body bigger than size, Varnish crashes > and if we have a chunked request the function will cache every request > bodies ignoring the provided size limitation. > 2. Regular expression match on body: how do we want the user interface > to be, do we want the function to return a boolean indicating if the > request body contains the string the user is looking for? In VCL this > can look like : > sub vcl_recv { > set req.http.x-boolean1 = std.regex_req_body("varnish rocks"); > } > > Or do we want to be more aligned with the regex syntax and make the > request body completely available to the user? In VCL this can look > like : > sub vcl_recv { > if (std.reqbody_re_match() ~ "varnish rocks") { > .... > } > } > > 3. Regular expression substitution on body, this function needs to be > discussed. Do we really need to be able to substitute on the request > body? Is it safe? How do we handle the possible increase of request > body? > > Proposed solutions: > 1. As decided a couple of weeks ago during a bugwash, we either buffer > the whole request body or fail the request. > I have a patch for this: if the request body is bigger than the given > size, we close the connection and move forward to the next request. > 2. && 3. to be discussed. > > Request body length access function: once the request body has been > cached, we can then iterate over it and return the number of bytes. > > Request body as input in vcl_hash: once the request body has been > cached, we can hash on it. This function should be available just in > vcl_hash. > Until now we have always just hashed on strings, but if we want to > hash on bodies we need to be aware that they can be binary, so we need > to handle this properly. > > I think functions regarding request body manipulation should be part > of the std.vmod. > > > General considerations: > Request bodies may contains binary data that headers should not contain. > Functions have to be able to handle any kind of request body. > > -- > Arianna Aondio > Software Developer | Varnish Software AS > Mobile: +47 980 62 619 > > We Make Websites Fly > www.varnish-software.com > > _______________________________________________ > varnish-dev mailing list > varnish-dev at varnish-cache.org > https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev > -------------- next part -------------- An HTML attachment was scrubbed... URL: From slink at schokola.de Fri Feb 27 18:04:23 2015 From: slink at schokola.de (Nils Goroll) Date: Fri, 27 Feb 2015 19:04:23 +0100 Subject: suggesting to increase timeout_req In-Reply-To: <54EEF4F9.9050208@schokola.de> References: <54EEF4F9.9050208@schokola.de> Message-ID: <54F0B1A7.5050707@schokola.de> On 26/02/15 11:27, Nils Goroll wrote: > This tcpdump output illustrates an issue we seem to have with default Linux tcp > timeouts and the default timeout_req of 2 seconds: After further pondering of this case and many similar cases from production systems' tcpdumps, I suggest the attached improvement. I hope the commit message will provide sufficient background. Nils -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Instruct-the-kernel-to-reset-the-connection-for-SC_R.patch Type: text/x-patch Size: 5430 bytes Desc: not available URL: From slink at schokola.de Fri Feb 27 18:05:50 2015 From: slink at schokola.de (Nils Goroll) Date: Fri, 27 Feb 2015 19:05:50 +0100 Subject: [PATCH] Instruct the kernel to reset the connection for SC_RX_TIMEOUT, and others In-Reply-To: <54EEF4F9.9050208@schokola.de> References: <54EEF4F9.9050208@schokola.de> Message-ID: <54F0B1FE.3040600@schokola.de> (apologies for the duplicate, I should have chosen a patchwork-friendly subject line in the first place) On 26/02/15 11:27, Nils Goroll wrote: > This tcpdump output illustrates an issue we seem to have with default Linux tcp > timeouts and the default timeout_req of 2 seconds: After further pondering of this case and many similar cases from production systems' tcpdumps, I suggest the attached improvement. I hope the commit message will provide sufficient background. Nils -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Instruct-the-kernel-to-reset-the-connection-for-SC_R.patch Type: text/x-patch Size: 5431 bytes Desc: not available URL: From slink at schokola.de Fri Feb 27 18:12:34 2015 From: slink at schokola.de (Nils Goroll) Date: Fri, 27 Feb 2015 19:12:34 +0100 Subject: Update [PATCH] Instruct the kernel to reset the connection for SC_RX_TIMEOUT, and others In-Reply-To: <54F0B1FE.3040600@schokola.de> References: <54EEF4F9.9050208@schokola.de> <54F0B1FE.3040600@schokola.de> Message-ID: <54F0B392.1020501@schokola.de> Any yet another apology for a "wife waiting" glitch: I had accidentally removed two lines of code and only noticed after posting. fixed patch attached again On 27/02/15 19:05, Nils Goroll wrote: > - assert(sp->fd >= 0); > - sp->reason = reason; -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Instruct-the-kernel-to-reset-the-connection-for-SC_R.patch Type: text/x-patch Size: 5416 bytes Desc: not available URL: