varnish-cache/bin/varnishd/cache/cache_hash.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * This is the central hash-table code, it relies on a chosen hash
31
 * implementation only for the actual hashing, all the housekeeping
32
 * happens here.
33
 *
34
 * We have two kinds of structures, objecthead and object.  An objecthead
35
 * corresponds to a given (Host:, URL) tuple, and the objects hung from
36
 * the objecthead may represent various variations (ie: Vary: header,
37
 * different TTL etc) instances of that web-entity.
38
 *
39
 * Each objecthead has a mutex which locks both its own fields, the
40
 * list of objects and fields in the objects.
41
 *
42
 * The hash implementation must supply a reference count facility on
43
 * the objecthead, and return with a reference held after a lookup.
44
 *
45
 * Lookups in the hash implementation returns with a ref held and each
46
 * object hung from the objhead holds a ref as well.
47
 *
48
 * Objects have refcounts which are locked by the objecthead mutex.
49
 *
50
 * New objects are always marked busy, and they can go from busy to
51
 * not busy only once.
52
 */
53
54
#include "config.h"
55
56
#include <stdio.h>
57
#include <stdlib.h>
58
59
#include "cache_varnishd.h"
60
61
#include "cache/cache_objhead.h"
62
#include "cache/cache_transport.h"
63
64
#include "hash/hash_slinger.h"
65
66
#include "vsha256.h"
67
68
struct rush {
69
        unsigned                magic;
70
#define RUSH_MAGIC              0xa1af5f01
71
        VTAILQ_HEAD(,req)       reqs;
72
};
73
74
static const struct hash_slinger *hash;
75
#define PRIVATE_OH_EXP 7
76
static struct objhead private_ohs[1 << PRIVATE_OH_EXP];
77
78
static void hsh_rush1(const struct worker *, struct objcore *,
79
    struct rush *);
80
static void hsh_rush2(struct worker *, struct rush *);
81
static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
82
static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
83
    struct objcore *oc);
84
static int hsh_deref_objcore_unlock(struct worker *, struct objcore **);
85
86
/*---------------------------------------------------------------------*/
87
88
#define VCF_RETURN(x) const struct vcf_return VCF_##x[1] = { \
89
        { .name = #x, } \
90
};
91
92
VCF_RETURNS()
93
#undef VCF_RETURN
94
95
/*---------------------------------------------------------------------*/
96
97
static void
98 4971573
hsh_initobjhead(struct objhead *oh)
99
{
100
101 4971573
        XXXAN(oh);
102 4971573
        INIT_OBJ(oh, OBJHEAD_MAGIC);
103 4971573
        oh->refcnt = 1;
104 4971573
        oh->waitinglist_gen = 1;
105 4971573
        VTAILQ_INIT(&oh->objcs);
106 4971573
        VTAILQ_INIT(&oh->waitinglist);
107 4971573
        Lck_New(&oh->mtx, lck_objhdr);
108 4971573
}
109
110
static struct objhead *
111 63925
hsh_newobjhead(void)
112
{
113 63925
        struct objhead *oh = malloc(sizeof *oh);
114 63925
        hsh_initobjhead(oh);
115 63925
        return (oh);
116
}
117
118
/*---------------------------------------------------------------------*/
119
/* Precreate an objhead and object for later use */
120
static void
121 107878
hsh_prealloc(struct worker *wrk)
122
{
123
124 107878
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
125
126 107878
        if (wrk->wpriv->nobjcore == NULL)
127 73067
                wrk->wpriv->nobjcore = ObjNew(wrk);
128 107878
        CHECK_OBJ_NOTNULL(wrk->wpriv->nobjcore, OBJCORE_MAGIC);
129
130 107878
        if (wrk->wpriv->nobjhead == NULL) {
131 63925
                wrk->wpriv->nobjhead = hsh_newobjhead();
132 63925
                wrk->stats->n_objecthead++;
133 63925
        }
134 107878
        CHECK_OBJ_NOTNULL(wrk->wpriv->nobjhead, OBJHEAD_MAGIC);
135
136 107878
        if (hash->prep != NULL)
137 107637
                hash->prep(wrk);
138 107878
}
139
140
/*---------------------------------------------------------------------*/
141
142
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
143
static inline size_t
144 57393
fib(uint64_t n, uint8_t bits)
145
{
146 57393
        const uint64_t gr = 11400714819323198485LLU;
147
        uint64_t r;
148
149 57393
        r = n * gr;
150 57393
        r >>= (sizeof(gr) * 8) - bits;
151 57393
        assert(r < (size_t)1 << bits);
152 57393
        return ((size_t)r);
153
}
154
155
struct objcore *
156 57400
HSH_Private(const struct worker *wrk)
157
{
158
        struct objcore *oc;
159
        struct objhead *oh;
160
161 57400
        oh = &private_ohs[fib((uintptr_t)wrk, PRIVATE_OH_EXP)];
162 57400
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
163
164 57400
        oc = ObjNew(wrk);
165 57400
        AN(oc);
166 57400
        oc->refcnt = 1;
167 57400
        oc->objhead = oh;
168 57400
        oc->flags |= OC_F_PRIVATE;
169 57400
        Lck_Lock(&oh->mtx);
170 57400
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
171 57400
        oh->refcnt++;
172 57400
        Lck_Unlock(&oh->mtx);
173 57400
        return (oc);
174
}
175
176
/*---------------------------------------------------------------------*/
177
178
void
179 38700
HSH_Cleanup(const struct worker *wrk)
180
{
181
182 38700
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
183 38700
        CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
184 38700
        if (wrk->wpriv->nobjcore != NULL)
185 78
                ObjDestroy(wrk, &wrk->wpriv->nobjcore);
186
187 38700
        if (wrk->wpriv->nobjhead != NULL) {
188 78
                CHECK_OBJ(wrk->wpriv->nobjhead, OBJHEAD_MAGIC);
189 78
                Lck_Delete(&wrk->wpriv->nobjhead->mtx);
190 78
                FREE_OBJ(wrk->wpriv->nobjhead);
191 78
                wrk->stats->n_objecthead--;
192 78
        }
193 38700
        if (wrk->wpriv->nhashpriv != NULL) {
194
                /* XXX: If needed, add slinger method for this */
195 80
                free(wrk->wpriv->nhashpriv);
196 80
                wrk->wpriv->nhashpriv = NULL;
197 80
        }
198 38700
}
199
200
void
201 0
HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
202
{
203 0
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
204 0
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
205
206 0
        AZ(oh->refcnt);
207 0
        assert(VTAILQ_EMPTY(&oh->objcs));
208 0
        assert(VTAILQ_EMPTY(&oh->waitinglist));
209 0
        Lck_Delete(&oh->mtx);
210 0
        wrk->stats->n_objecthead--;
211 0
        FREE_OBJ(oh);
212 0
}
213
214
void
215 597410
HSH_AddString(struct req *req, void *ctx, const char *str)
216
{
217
218 597410
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
219 597410
        AN(ctx);
220 597410
        if (str != NULL) {
221 298684
                VSHA256_Update(ctx, str, strlen(str));
222 298684
                VSLbs(req->vsl, SLT_Hash, TOSTRAND(str));
223 298684
        } else
224 298726
                VSHA256_Update(ctx, &str, 1);
225 597410
}
226
227
/*---------------------------------------------------------------------
228
 * This is a debugging hack to enable testing of boundary conditions
229
 * in the hash algorithm.
230
 * We trap the first 9 different digests and translate them to different
231
 * digests with edge bit conditions
232
 */
233
234
static struct hsh_magiclist {
235
        unsigned char was[VSHA256_LEN];
236
        unsigned char now[VSHA256_LEN];
237
} hsh_magiclist[] = {
238
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
239
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
241
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
242
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
243
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
244
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
245
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
246
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
247
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
248
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
249
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
250
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
251
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
252
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
253
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
254
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
255
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
256
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
257
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
258
        { .now = {      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
259
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
260
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
261
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
262
        { .now = {      0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
263
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
264
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
265
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
266
        { .now = {      0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
268
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
269
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
270
        { .now = {      0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
271
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
272
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
273
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
274
};
275
276
#define HSH_NMAGIC vcountof(hsh_magiclist)
277
278
static void
279 720
hsh_testmagic(void *result)
280
{
281
        size_t i, j;
282
        static size_t nused = 0;
283
284 3600
        for (i = 0; i < nused; i++)
285 3240
                if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
286 360
                        break;
287 720
        if (i == nused && i < HSH_NMAGIC)
288 360
                memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
289 720
        if (i == nused)
290 0
                return;
291 720
        assert(i < HSH_NMAGIC);
292 720
        fprintf(stderr, "HASHMAGIC: <");
293 23760
        for (j = 0; j < VSHA256_LEN; j++)
294 23040
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
295 720
        fprintf(stderr, "> -> <");
296 720
        memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
297 23760
        for (j = 0; j < VSHA256_LEN; j++)
298 23040
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
299 720
        fprintf(stderr, ">\n");
300 720
}
301
302
/*---------------------------------------------------------------------
303
 * Insert an object which magically appears out of nowhere or, more likely,
304
 * comes off some persistent storage device.
305
 * Insert it with a reference held.
306
 */
307
308
void
309 680
HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
310
    struct ban *ban)
311
{
312
        struct objhead *oh;
313
        struct rush rush;
314
315 680
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
316 680
        CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
317 680
        AN(digest);
318 680
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
319 680
        AN(ban);
320 680
        AZ(oc->flags & OC_F_BUSY);
321 680
        AZ(oc->flags & OC_F_PRIVATE);
322 680
        assert(oc->refcnt == 1);
323 680
        INIT_OBJ(&rush, RUSH_MAGIC);
324
325 680
        hsh_prealloc(wrk);
326
327 680
        AN(wrk->wpriv->nobjhead);
328 680
        oh = hash->lookup(wrk, digest, &wrk->wpriv->nobjhead);
329 680
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
330 680
        Lck_AssertHeld(&oh->mtx);
331 680
        assert(oh->refcnt > 0);
332
333
        /* Mark object busy and insert (precreated) objcore in
334
           objecthead. The new object inherits our objhead reference. */
335 680
        oc->objhead = oh;
336 680
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
337 680
        EXP_RefNewObjcore(oc);
338 680
        Lck_Unlock(&oh->mtx);
339
340 680
        BAN_RefBan(oc, ban);
341 680
        AN(oc->ban);
342
343
        /* Move the object first in the oh list, unbusy it and run the
344
           waitinglist if necessary */
345 680
        Lck_Lock(&oh->mtx);
346 680
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
347 680
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
348 680
        if (!VTAILQ_EMPTY(&oh->waitinglist))
349 0
                hsh_rush1(wrk, oc, &rush);
350 680
        Lck_Unlock(&oh->mtx);
351 680
        hsh_rush2(wrk, &rush);
352
353 680
        EXP_Insert(wrk, oc);
354 680
}
355
356
/*---------------------------------------------------------------------
357
 */
358
359
static struct objcore *
360 61545
hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
361
{
362
        struct objcore *oc;
363
364 61545
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
365 61545
        CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
366 61545
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
367 61545
        Lck_AssertHeld(&oh->mtx);
368
369 61545
        oc = wrk->wpriv->nobjcore;
370 61545
        wrk->wpriv->nobjcore = NULL;
371 61545
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
372
373 61545
        AZ(oc->flags & OC_F_BUSY);
374 61545
        oc->flags |= OC_F_BUSY;
375 61545
        oc->refcnt = 1;         /* Owned by busyobj */
376 61545
        oc->objhead = oh;
377 61545
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
378 61545
        return (oc);
379
}
380
381
/*---------------------------------------------------------------------
382
 */
383
384
static int
385 157328
hsh_vry_match(const struct req *req, struct objcore *oc, const uint8_t *vary)
386
{
387
388 157328
        if (req->hash_ignore_vary)
389 40
                return (1);
390 157288
        if (vary == NULL) {
391 157215
                if (! ObjHasAttr(req->wrk, oc, OA_VARY))
392 44335
                        return (1);
393 112880
                vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
394 112880
                AN(vary);
395 112880
        }
396 112953
        return (VRY_Match(req, vary));
397 157328
}
398
399
static unsigned
400 2218
hsh_rush_match(const struct req *req)
401
{
402
        struct objcore *oc;
403
404 2218
        oc = req->objcore;
405 2218
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
406 2218
        assert(oc->refcnt > 0);
407
408 2218
        AZ(oc->flags & OC_F_BUSY);
409 2218
        AZ(oc->flags & OC_F_PRIVATE);
410 2218
        if (oc->flags & (OC_F_WITHDRAWN|OC_F_HFM|OC_F_HFP|OC_F_CANCEL|
411
            OC_F_FAILED))
412 480
                return (0);
413
414 1738
        if (req->vcf != NULL) /* NB: must operate under oh lock. */
415 0
                return (0);
416
417 1738
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
418
419 1738
        return (hsh_vry_match(req, oc, NULL));
420 2218
}
421
422
/*---------------------------------------------------------------------
423
 */
424
425
enum lookup_e
426 107196
HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
427
{
428
        struct worker *wrk;
429
        struct objhead *oh;
430
        struct objcore *oc;
431
        struct objcore *exp_oc;
432
        const struct vcf_return *vr;
433
        vtim_real exp_t_origin;
434
        int busy_found;
435
        intmax_t boc_progress;
436 107196
        unsigned xid = 0;
437
        unsigned ban_checks;
438
        unsigned ban_any_variant;
439 107196
        float dttl = 0.0;
440
441 107196
        AN(ocp);
442 107196
        *ocp = NULL;
443 107196
        AN(bocp);
444 107196
        *bocp = NULL;
445
446 107196
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
447 107196
        wrk = req->wrk;
448 107196
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
449 107196
        CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
450 107196
        CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
451 107196
        CHECK_OBJ_ORNULL(req->objcore, OBJCORE_MAGIC);
452 107196
        CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
453 107196
        AN(hash);
454
455 107196
        hsh_prealloc(wrk);
456 107196
        if (DO_DEBUG(DBG_HASHEDGE))
457 720
                hsh_testmagic(req->digest);
458
459
        /*
460
         * When a req rushes off the waiting list, it brings an implicit
461
         * oh refcnt acquired at disembark time and an oc ref (with its
462
         * own distinct oh ref) acquired during rush hour.
463
         */
464
465 107196
        if (req->objcore != NULL && hsh_rush_match(req)) {
466 1658
                TAKE_OBJ_NOTNULL(oc, &req->objcore, OBJCORE_MAGIC);
467 1658
                *ocp = oc;
468 1658
                oh = oc->objhead;
469 1658
                Lck_Lock(&oh->mtx);
470 1658
                oc->hits++;
471 1658
                boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
472 1658
                AN(hsh_deref_objhead_unlock(wrk, &oh, oc));
473 1658
                Req_LogHit(wrk, req, oc, boc_progress);
474
                /* NB: since this hit comes from the waiting list instead of
475
                 * a regular lookup, grace is not considered. The object is
476
                 * fresh in the context of the waiting list, even expired: it
477
                 * was successfully just [re]validated by a fetch task.
478
                 */
479 1658
                return (HSH_HIT);
480
        }
481
482 105538
        if (req->objcore != NULL) {
483 567
                oh = req->objcore->objhead;
484 567
                (void)HSH_DerefObjCore(wrk, &req->objcore);
485 567
                Lck_Lock(&oh->mtx);
486 567
        } else {
487 104971
                AN(wrk->wpriv->nobjhead);
488 104971
                oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
489
        }
490
491 105538
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
492 105538
        Lck_AssertHeld(&oh->mtx);
493
494 105538
        if (req->hash_always_miss) {
495
                /* XXX: should we do predictive Vary in this case ? */
496
                /* Insert new objcore in objecthead and release mutex */
497 680
                *bocp = hsh_insert_busyobj(wrk, oh);
498
                /* NB: no deref of objhead, new object inherits reference */
499 680
                Lck_Unlock(&oh->mtx);
500 680
                return (HSH_MISS);
501
        }
502
503 104858
        assert(oh->refcnt > 0);
504 104858
        busy_found = 0;
505 104858
        exp_oc = NULL;
506 104858
        exp_t_origin = 0.0;
507 104858
        ban_checks = 0;
508 104858
        ban_any_variant = cache_param->ban_any_variant;
509 221698
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
510
                /* Must be at least our own ref + the objcore we examine */
511 159855
                assert(oh->refcnt > 1);
512 159855
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
513 159855
                assert(oc->objhead == oh);
514 159855
                assert(oc->refcnt > 0);
515
516 159855
                if (oc->flags & OC_F_DYING)
517 0
                        continue;
518 159855
                if (oc->flags & OC_F_FAILED)
519 0
                        continue;
520
521 159855
                CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
522 159855
                if (oc->flags & OC_F_BUSY) {
523 2459
                        if (req->hash_ignore_busy)
524 40
                                continue;
525
526 2419
                        if (oc->boc && oc->boc->vary != NULL &&
527 73
                            !hsh_vry_match(req, oc, oc->boc->vary)) {
528 40
                                wrk->strangelove++;
529 40
                                continue;
530
                        }
531
532 2379
                        busy_found = 1;
533 2379
                        continue;
534
                }
535
536 157396
                if (oc->ttl <= 0.)
537 1880
                        continue;
538
539 155516
                if (ban_checks++ < ban_any_variant
540 155516
                    && BAN_CheckObject(wrk, oc, req)) {
541 0
                        oc->flags |= OC_F_DYING;
542 0
                        EXP_Remove(oc, NULL);
543 0
                        continue;
544
                }
545
546 155516
                if (!hsh_vry_match(req, oc, NULL)) {
547 105040
                        wrk->strangelove++;
548 105040
                        continue;
549
                }
550
551 50476
                if (ban_checks >= ban_any_variant
552 50476
                    && BAN_CheckObject(wrk, oc, req)) {
553 1360
                        oc->flags |= OC_F_DYING;
554 1360
                        EXP_Remove(oc, NULL);
555 1360
                        continue;
556
                }
557
558 49116
                if (req->vcf != NULL) {
559 320
                        vr = req->vcf->func(req, &oc, &exp_oc, 0);
560 320
                        if (vr == VCF_CONTINUE)
561 160
                                continue;
562 160
                        if (vr == VCF_MISS) {
563 120
                                oc = NULL;
564 120
                                break;
565
                        }
566 40
                        if (vr == VCF_HIT)
567 40
                                break;
568 0
                        assert(vr == VCF_DEFAULT);
569 0
                }
570
571 48796
                if (EXP_Ttl(req, oc) > req->t_req) {
572 42855
                        assert(oh->refcnt > 1);
573 42855
                        assert(oc->objhead == oh);
574 42855
                        break;
575
                }
576
577 5941
                if (EXP_Ttl(NULL, oc) <= req->t_req && /* ignore req.max_age */
578 5800
                    oc->t_origin > exp_t_origin) {
579
                        /* record the newest object */
580 5760
                        exp_oc = oc;
581 5760
                        exp_t_origin = oc->t_origin;
582 5760
                        assert(oh->refcnt > 1);
583 5760
                        assert(exp_oc->objhead == oh);
584 5760
                }
585 5941
        }
586
587 104858
        if (req->vcf != NULL)
588 240
                (void)req->vcf->func(req, &oc, &exp_oc, 1);
589
590 104858
        if (oc != NULL && oc->flags & OC_F_HFP) {
591 440
                xid = VXID(ObjGetXID(wrk, oc));
592 440
                dttl = EXP_Dttl(req, oc);
593 440
                AN(hsh_deref_objhead_unlock(wrk, &oh, oc));
594 440
                wrk->stats->cache_hitpass++;
595 440
                VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
596 440
                return (HSH_HITPASS);
597
        }
598
599 104418
        if (oc != NULL) {
600 42495
                *ocp = oc;
601 42495
                oc->refcnt++;
602 42495
                if (oc->flags & OC_F_HFM) {
603 1280
                        xid = VXID(ObjGetXID(wrk, oc));
604 1280
                        dttl = EXP_Dttl(req, oc);
605 1280
                        *bocp = hsh_insert_busyobj(wrk, oh);
606 1280
                        Lck_Unlock(&oh->mtx);
607 1280
                        wrk->stats->cache_hitmiss++;
608 1280
                        VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
609 1280
                        return (HSH_HITMISS);
610
                }
611 41215
                oc->hits++;
612 41215
                boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
613 41215
                AN(hsh_deref_objhead_unlock(wrk, &oh, oc));
614 41215
                Req_LogHit(wrk, req, oc, boc_progress);
615 41215
                return (HSH_HIT);
616
        }
617
618 61923
        if (exp_oc != NULL && exp_oc->flags & OC_F_HFM) {
619
                /*
620
                 * expired HFM ("grace/keep HFM")
621
                 *
622
                 * XXX should HFM objects actually have grace/keep ?
623
                 * XXX also:  why isn't *ocp = exp_oc ?
624
                 */
625 160
                xid = VXID(ObjGetXID(wrk, exp_oc));
626 160
                dttl = EXP_Dttl(req, exp_oc);
627 160
                *bocp = hsh_insert_busyobj(wrk, oh);
628 160
                Lck_Unlock(&oh->mtx);
629 160
                wrk->stats->cache_hitmiss++;
630 160
                VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
631 160
                return (HSH_HITMISS);
632
        }
633
634 61763
        if (exp_oc != NULL && exp_oc->boc != NULL)
635 200
                boc_progress = exp_oc->boc->fetched_so_far;
636
        else
637 61563
                boc_progress = -1;
638
639 61763
        if (!busy_found) {
640 59425
                *bocp = hsh_insert_busyobj(wrk, oh);
641
642 59425
                if (exp_oc != NULL) {
643 5400
                        exp_oc->refcnt++;
644 5400
                        *ocp = exp_oc;
645 5400
                        if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
646 3800
                                exp_oc->hits++;
647 3800
                                Lck_Unlock(&oh->mtx);
648 3800
                                Req_LogHit(wrk, req, exp_oc, boc_progress);
649 3800
                                return (HSH_GRACE);
650
                        }
651 1600
                }
652 55625
                Lck_Unlock(&oh->mtx);
653 55625
                return (HSH_MISS);
654
        }
655
656 2338
        AN(busy_found);
657 2338
        if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
658
                /* we do not wait on the busy object if in grace */
659 120
                exp_oc->refcnt++;
660 120
                *ocp = exp_oc;
661 120
                exp_oc->hits++;
662 120
                AN(hsh_deref_objhead_unlock(wrk, &oh, NULL));
663 120
                Req_LogHit(wrk, req, exp_oc, boc_progress);
664 120
                return (HSH_GRACE);
665
        }
666
667
        /* There are one or more busy objects, wait for them */
668 2218
        VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
669
670 2218
        AZ(req->hash_ignore_busy);
671
672
        /*
673
         * The objhead reference is held by req while it is parked on the
674
         * waiting list. The oh pointer is taken back from the objcore that
675
         * triggers a rush of req off the waiting list.
676
         */
677 2218
        assert(oh->refcnt > 1);
678
679 2218
        req->wrk = NULL;
680 2218
        req->waitinglist_gen = oh->waitinglist_gen;
681
682 2218
        if (DO_DEBUG(DBG_WAITINGLIST))
683 880
                VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
684
685 2218
        Lck_Unlock(&oh->mtx);
686
687 2218
        wrk->stats->busy_sleep++;
688 2218
        return (HSH_BUSY);
689 107196
}
690
691
/*---------------------------------------------------------------------
692
 * Pick the req's we are going to rush from the waiting list
693
 */
694
695
static void
696 4884
hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
697
{
698
        struct objhead *oh;
699
        struct req *req;
700
        int i, max;
701
702 4884
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
703 4884
        CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC);
704 4884
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
705 4884
        VTAILQ_INIT(&r->reqs);
706
707 4884
        if (oc == NULL)
708 40
                return;
709
710 4844
        oh = oc->objhead;
711 4844
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
712 4844
        Lck_AssertHeld(&oh->mtx);
713
714 4844
        AZ(oc->flags & OC_F_BUSY);
715 4844
        AZ(oc->flags & OC_F_PRIVATE);
716 4844
        max = cache_param->rush_exponent;
717 4844
        if (oc->flags & (OC_F_WITHDRAWN|OC_F_FAILED))
718 3225
                max = 1;
719 4844
        assert(max > 0);
720
721 4844
        if (oc->waitinglist_gen == 0) {
722 4604
                oc->waitinglist_gen = oh->waitinglist_gen;
723 4604
                oh->waitinglist_gen++;
724 4604
        }
725
726 7063
        for (i = 0; i < max; i++) {
727 6623
                req = VTAILQ_FIRST(&oh->waitinglist);
728 6623
                if (req == NULL)
729 4404
                        break;
730
731 2219
                CHECK_OBJ(req, REQ_MAGIC);
732
733
                /* NB: The waiting list is naturally sorted by generation.
734
                 *
735
                 * Because of the exponential nature of the rush, it is
736
                 * possible that new requests enter the waiting list before
737
                 * the rush for this oc completes. Because the OC_F_BUSY flag
738
                 * was cleared before the beginning of the rush, requests
739
                 * from a newer generation already got a chance to evaluate
740
                 * oc during a lookup and it didn't match their criteria.
741
                 *
742
                 * Therefore there's no point propagating the exponential
743
                 * rush of this oc when we see a newer generation.
744
                 */
745 2219
                if (req->waitinglist_gen > oc->waitinglist_gen)
746 0
                        break;
747
748 2219
                AZ(req->wrk);
749 2219
                VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
750 2219
                VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
751 2219
                req->objcore = oc;
752 2219
                oc->refcnt++;
753 2219
                wrk->stats->busy_wakeup++;
754 2219
        }
755 4884
}
756
757
/*---------------------------------------------------------------------
758
 * Rush req's that came from waiting list.
759
 */
760
761
static void
762 125304
hsh_rush2(struct worker *wrk, struct rush *r)
763
{
764
        struct req *req;
765
766 125304
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
767 125304
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
768
769 127523
        while (!VTAILQ_EMPTY(&r->reqs)) {
770 2219
                req = VTAILQ_FIRST(&r->reqs);
771 2219
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
772 2219
                VTAILQ_REMOVE(&r->reqs, req, w_list);
773 2219
                DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
774 2219
                if (req->transport->reembark != NULL) {
775
                        // For ESI includes
776 41
                        req->transport->reembark(wrk, req);
777 41
                } else {
778
                        /*
779
                         * We ignore the queue limits which apply to new
780
                         * requests because if we fail to reschedule there
781
                         * may be vmod_privs to cleanup and we need a proper
782
                         * workerthread for that.
783
                         */
784 2178
                        AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
785
                }
786
        }
787 125304
}
788
789
/*---------------------------------------------------------------------
790
 * Purge an entire objhead
791
 */
792
793
unsigned
794 960
HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
795
    vtim_dur ttl, vtim_dur grace, vtim_dur keep)
796
{
797
        struct objcore *oc, *oc_nows[2], **ocp;
798 960
        unsigned i, j, n, n_max, total = 0;
799
        int is_purge;
800
801 960
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
802 960
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
803
804 960
        is_purge = (ttl == 0 && grace == 0 && keep == 0);
805 960
        n_max = WS_ReserveLumps(wrk->aws, sizeof *ocp);
806 960
        if (n_max < 2) {
807
                /* No space on the workspace. Give it a stack buffer of 2
808
                 * elements, which is the minimum for the algorithm
809
                 * below. */
810 0
                ocp = oc_nows;
811 0
                n_max = 2;
812 0
        } else
813 960
                ocp = WS_Reservation(wrk->aws);
814 960
        AN(ocp);
815
816
        /* Note: This algorithm uses OC references in the list as
817
         * bookmarks, in order to know how far into the list we were when
818
         * releasing the mutex partway through and want to resume
819
         * again. This relies on the list not being reordered while we are
820
         * not holding the mutex. The only place where that happens is in
821
         * HSH_Unbusy(), where an OC_F_BUSY OC is moved first in the
822
         * list. This does not cause problems because we skip OC_F_BUSY
823
         * OCs. */
824
825 960
        Lck_Lock(&oh->mtx);
826 960
        oc = VTAILQ_FIRST(&oh->objcs);
827 960
        n = 0;
828 1000
        while (1) {
829 5680
                for (; n < n_max && oc != NULL; oc = VTAILQ_NEXT(oc, hsh_list))
830
                {
831 4680
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
832 4680
                        assert(oc->objhead == oh);
833 4680
                        if (oc->flags & OC_F_BUSY) {
834
                                /* We cannot purge busy objects here, because
835
                                 * their owners have special rights to them,
836
                                 * and may nuke them without concern for the
837
                                 * refcount, which by definition always must
838
                                 * be one, so they don't check. */
839 920
                                continue;
840
                        }
841 3760
                        if (oc->flags & OC_F_DYING)
842 0
                                continue;
843 3760
                        if (is_purge)
844 3360
                                oc->flags |= OC_F_DYING;
845 3760
                        oc->refcnt++;
846 3760
                        ocp[n++] = oc;
847 3760
                }
848
849 1000
                Lck_Unlock(&oh->mtx);
850
851 1000
                if (n == 0) {
852
                        /* No eligible objcores found. We are finished. */
853 160
                        break;
854
                }
855
856 840
                j = n;
857 840
                if (oc != NULL) {
858
                        /* There are more objects on the objhead that we
859
                         * have not yet looked at, but no more space on
860
                         * the objcore reference list. Do not process the
861
                         * last one, it will be used as the bookmark into
862
                         * the objcore list for the next iteration of the
863
                         * outer loop. */
864 40
                        j--;
865 40
                        assert(j >= 1); /* True because n_max >= 2 */
866 40
                }
867 4600
                for (i = 0; i < j; i++) {
868 3760
                        CHECK_OBJ_NOTNULL(ocp[i], OBJCORE_MAGIC);
869 3760
                        if (is_purge)
870 3360
                                EXP_Remove(ocp[i], NULL);
871
                        else
872 400
                                EXP_Reduce(ocp[i], ttl_now, ttl, grace, keep);
873 3760
                        (void)HSH_DerefObjCore(wrk, &ocp[i]);
874 3760
                        AZ(ocp[i]);
875 3760
                        total++;
876 3760
                }
877
878 840
                if (j == n) {
879
                        /* No bookmark set, that means we got to the end
880
                         * of the objcore list in the previous run and are
881
                         * finished. */
882 800
                        break;
883
                }
884
885 40
                Lck_Lock(&oh->mtx);
886
887
                /* Move the bookmark first and continue scanning the
888
                 * objcores */
889 40
                CHECK_OBJ_NOTNULL(ocp[j], OBJCORE_MAGIC);
890 40
                ocp[0] = ocp[j];
891 40
                n = 1;
892 40
                oc = VTAILQ_NEXT(ocp[0], hsh_list);
893 40
                CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC);
894
        }
895
896 960
        WS_Release(wrk->aws, 0);
897 960
        if (is_purge)
898 560
                Pool_PurgeStat(total);
899 960
        return (total);
900
}
901
902
/*---------------------------------------------------------------------
903
 * Fail an objcore
904
 */
905
906
void
907 2719
HSH_Fail(struct worker *wrk, struct objcore *oc)
908
{
909
        struct objhead *oh;
910
        struct rush rush;
911
912 2719
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
913 2719
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
914 2719
        CHECK_OBJ_NOTNULL(oc->boc, BOC_MAGIC);
915 2719
        oh = oc->objhead;
916 2719
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
917 2719
        INIT_OBJ(&rush, RUSH_MAGIC);
918
919
        /*
920
         * We either failed before the end of vcl_backend_response
921
         * and a cache miss has the busy bit, so that HSH_Lookup()
922
         * will not consider this oc, or an object hung off the oc
923
         * so that it can consider it.
924
         *
925
         * We can only fail an ongoing fetch in a backend context
926
         * so we can safely check the BOC state as it won't change
927
         * under our feet.
928
         */
929 2719
        if (oc->boc->state < BOS_STREAM)
930 2040
                assert(oc->flags & (OC_F_BUSY|OC_F_PRIVATE));
931
        else
932 679
                assert(oc->stobj->stevedore != NULL);
933
934 2719
        Lck_Lock(&oh->mtx);
935 2719
        oc->flags |= OC_F_FAILED;
936 2719
        if (oc->flags & OC_F_BUSY) {
937 1800
                oc->flags &= ~OC_F_BUSY;
938 1800
                hsh_rush1(wrk, oc, &rush);
939 1800
        }
940 2719
        Lck_Unlock(&oh->mtx);
941 2719
        hsh_rush2(wrk, &rush);
942 2719
}
943
944
/*---------------------------------------------------------------------
945
 * Mark a fetch we will not need as cancelled
946
 */
947
948
static void
949 14381
hsh_cancel(struct objcore *oc)
950
{
951
        struct objhead *oh;
952
953 14381
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
954 14381
        oh = oc->objhead;
955 14381
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
956
957 14381
        Lck_Lock(&oh->mtx);
958 14381
        oc->flags |= OC_F_CANCEL;
959 14381
        Lck_Unlock(&oh->mtx);
960 14381
}
961
962
/*---------------------------------------------------------------------
963
 * Cancel a fetch when the client does not need it any more
964
 */
965
966
void
967 154921
HSH_Cancel(struct worker *wrk, struct objcore *oc, struct boc *boc)
968
{
969 154921
        struct boc *bocref = NULL;
970
971 154921
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
972
973 154921
        if ((oc->flags & OC_F_TRANSIENT) == 0)
974 98322
                return;
975
976
        /*
977
         * NB: we use two distinct variables to only release the reference if
978
         * we had to acquire one. The caller-provided boc is optional.
979
         */
980 56599
        if (boc == NULL)
981 42330
                bocref = boc = HSH_RefBoc(oc);
982
983 56599
        CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
984
985 56599
        if (oc->flags & OC_F_HFP)
986 880
                AN(oc->flags & OC_F_HFM);
987
988 56599
        if (boc != NULL) {
989 14380
                hsh_cancel(oc);
990 14380
                (void)ObjWaitState(oc, BOS_FINISHED);
991 14380
        }
992
993 56599
        if (bocref != NULL)
994 113
                HSH_DerefBoc(wrk, oc);
995
996 56599
        ObjSlim(wrk, oc);
997 154921
}
998
999
/*---------------------------------------------------------------------
1000
 * Withdraw an objcore that will not proceed with a fetch.
1001
 */
1002
1003
void
1004 1425
HSH_Withdraw(struct worker *wrk, struct objcore **ocp)
1005
{
1006
        struct objhead *oh;
1007
        struct objcore *oc;
1008
        struct rush rush;
1009
1010 1425
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1011 1425
        TAKE_OBJ_NOTNULL(oc, ocp, OBJCORE_MAGIC);
1012 1425
        INIT_OBJ(&rush, RUSH_MAGIC);
1013
1014 1425
        oh = oc->objhead;
1015 1425
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
1016
1017 1425
        Lck_Lock(&oh->mtx);
1018 1425
        AZ(oc->stobj->stevedore);
1019 1425
        AN(oc->flags & OC_F_BUSY);
1020 1425
        assert(oc->refcnt == 1);
1021 1425
        assert(oh->refcnt > 0);
1022 1425
        oc->flags &= ~OC_F_BUSY;
1023 1425
        oc->flags |= OC_F_WITHDRAWN;
1024 1425
        hsh_rush1(wrk, oc, &rush); /* grabs up to 1 oc ref */
1025 1425
        assert(hsh_deref_objcore_unlock(wrk, &oc) <= 1);
1026
1027 1425
        hsh_rush2(wrk, &rush);
1028 1425
}
1029
1030
/*---------------------------------------------------------------------
1031
 * Unbusy an objcore when the object is completely fetched.
1032
 */
1033
1034
void
1035 89878
HSH_Unbusy(struct worker *wrk, struct objcore *oc)
1036
{
1037
        struct objhead *oh;
1038
        struct rush rush;
1039
1040 89878
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1041 89878
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1042 89878
        CHECK_OBJ_NOTNULL(oc->boc, BOC_MAGIC);
1043
1044 89878
        oh = oc->objhead;
1045 89878
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
1046
1047 89878
        AN(oc->stobj->stevedore);
1048 89878
        assert(oh->refcnt > 0);
1049 89878
        assert(oc->refcnt > 0);
1050
1051 89878
        if (oc->flags & OC_F_PRIVATE) {
1052 31598
                AZ(oc->flags & OC_F_BUSY);
1053 31598
                return;
1054
        }
1055
1056 58280
        AN(oc->flags & OC_F_BUSY);
1057 58280
        INIT_OBJ(&rush, RUSH_MAGIC);
1058
1059 58280
        BAN_NewObjCore(oc);
1060 58280
        AN(oc->ban);
1061
1062
        /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
1063 58280
        Lck_Lock(&oh->mtx);
1064 58280
        assert(oh->refcnt > 0);
1065 58280
        assert(oc->refcnt > 0);
1066 58280
        EXP_RefNewObjcore(oc); /* Takes a ref for expiry */
1067
        /* XXX: strictly speaking, we should sort in Date: order. */
1068 58280
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
1069 58280
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
1070 58280
        oc->flags &= ~OC_F_BUSY;
1071 58280
        if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1072 1358
                assert(oh->refcnt > 1);
1073 1358
                hsh_rush1(wrk, oc, &rush);
1074 1358
        }
1075 58280
        Lck_Unlock(&oh->mtx);
1076 58280
        EXP_Insert(wrk, oc);
1077 58280
        hsh_rush2(wrk, &rush);
1078 89878
}
1079
1080
/*====================================================================
1081
 * HSH_Kill()
1082
 *
1083
 * It's dead Jim, kick it...
1084
 */
1085
1086
void
1087 9559
HSH_Kill(struct objcore *oc)
1088
{
1089
1090 9559
        HSH_Replace(oc, NULL);
1091 9559
}
1092
1093
void
1094 12759
HSH_Replace(struct objcore *oc, const struct objcore *new_oc)
1095
{
1096
1097 12759
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1098 12759
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
1099 12759
        if (new_oc != NULL) {
1100 3199
                CHECK_OBJ(new_oc, OBJCORE_MAGIC);
1101 3199
                assert(oc->objhead == new_oc->objhead);
1102 3199
        }
1103
1104 12759
        Lck_Lock(&oc->objhead->mtx);
1105 12759
        oc->flags |= OC_F_DYING;
1106 12759
        Lck_Unlock(&oc->objhead->mtx);
1107 12759
        EXP_Remove(oc, new_oc);
1108 12759
}
1109
1110
/*====================================================================
1111
 * HSH_Snipe()
1112
 *
1113
 * If objcore is idle, gain a ref and mark it dead.
1114
 */
1115
1116
int
1117 440
HSH_Snipe(const struct worker *wrk, struct objcore *oc)
1118
{
1119 440
        int retval = 0;
1120
1121 440
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1122 440
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1123 440
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
1124
1125 440
        if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
1126 440
                if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
1127 440
                        oc->flags |= OC_F_DYING;
1128 440
                        oc->refcnt++;
1129 440
                        retval = 1;
1130 440
                }
1131 440
                Lck_Unlock(&oc->objhead->mtx);
1132 440
        }
1133 440
        if (retval)
1134 440
                EXP_Remove(oc, NULL);
1135 440
        return (retval);
1136
}
1137
1138
1139
/*---------------------------------------------------------------------
1140
 * Gain a reference on an objcore
1141
 */
1142
1143
void
1144 99234
HSH_Ref(struct objcore *oc)
1145
{
1146
        struct objhead *oh;
1147
1148 99234
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1149 99234
        oh = oc->objhead;
1150 99234
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
1151 99234
        Lck_Lock(&oh->mtx);
1152 99234
        assert(oc->refcnt > 0);
1153 99234
        oc->refcnt++;
1154 99234
        Lck_Unlock(&oh->mtx);
1155 99234
}
1156
1157
/*---------------------------------------------------------------------
1158
 * Gain a reference on the busyobj, if the objcore has one
1159
 */
1160
1161
struct boc *
1162 387480
HSH_RefBoc(const struct objcore *oc)
1163
{
1164
        struct objhead *oh;
1165
        struct boc *boc;
1166
1167 387480
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1168 387480
        oh = oc->objhead;
1169 387480
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
1170 387480
        if (oc->boc == NULL)
1171 227278
                return (NULL);
1172 160202
        Lck_Lock(&oh->mtx);
1173 160202
        assert(oc->refcnt > 0);
1174 160202
        boc = oc->boc;
1175 160202
        CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
1176 160202
        if (boc != NULL) {
1177 160152
                assert(boc->refcount > 0);
1178 160152
                if (boc->state < BOS_FINISHED)
1179 159231
                        boc->refcount++;
1180
                else
1181 921
                        boc = NULL;
1182 160152
        }
1183 160202
        Lck_Unlock(&oh->mtx);
1184 160202
        return (boc);
1185 387478
}
1186
1187
void
1188 277298
HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
1189
{
1190
        struct boc *boc;
1191
        unsigned r;
1192
1193 277298
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1194 277298
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
1195 277298
        boc = oc->boc;
1196 277298
        CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
1197 277298
        Lck_Lock(&oc->objhead->mtx);
1198 277298
        assert(oc->refcnt > 0);
1199 277298
        assert(boc->refcount > 0);
1200 277298
        r = --boc->refcount;
1201 277298
        if (r == 0)
1202 118114
                oc->boc = NULL;
1203 277298
        Lck_Unlock(&oc->objhead->mtx);
1204 277298
        if (r == 0)
1205 118118
                ObjBocDone(wrk, oc, &boc);
1206 277298
}
1207
1208
/*--------------------------------------------------------------------
1209
 * Dereference objcore
1210
 *
1211
 * Returns zero if target was destroyed.
1212
 */
1213
1214
int
1215 288022
HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp)
1216
{
1217
        struct objcore *oc;
1218
        struct objhead *oh;
1219
1220 288022
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1221 288022
        TAKE_OBJ_NOTNULL(oc, ocp, OBJCORE_MAGIC);
1222 288022
        assert(oc->refcnt > 0);
1223
1224 288022
        oh = oc->objhead;
1225 288022
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
1226
1227 288022
        Lck_Lock(&oh->mtx);
1228 288022
        return (hsh_deref_objcore_unlock(wrk, &oc));
1229
}
1230
1231
static int
1232 289556
hsh_deref_objcore_unlock(struct worker *wrk, struct objcore **ocp)
1233
{
1234
        struct objcore *oc;
1235
        struct objhead *oh;
1236
        int r;
1237
1238 289556
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1239 289556
        TAKE_OBJ_NOTNULL(oc, ocp, OBJCORE_MAGIC);
1240 289556
        assert(oc->refcnt > 0);
1241
1242 289556
        oh = oc->objhead;
1243 289556
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
1244
1245 289556
        Lck_AssertHeld(&oh->mtx);
1246 289556
        assert(oh->refcnt > 0);
1247 289556
        r = --oc->refcnt;
1248 289556
        if (!r)
1249 76166
                VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
1250 289556
        Lck_Unlock(&oh->mtx);
1251 289556
        if (r != 0)
1252 213392
                return (r);
1253
1254 76164
        AZ(oc->flags & OC_F_BUSY);
1255 76164
        AZ(oc->exp_flags);
1256
1257 76164
        BAN_DestroyObj(oc);
1258 76164
        AZ(oc->ban);
1259
1260 76164
        if (oc->stobj->stevedore != NULL)
1261 72620
                ObjFreeObj(wrk, oc);
1262 76164
        ObjDestroy(wrk, &oc);
1263
1264
        /* Drop our ref on the objhead */
1265 76164
        assert(oh->refcnt > 0);
1266 76164
        (void)hsh_deref_objhead(wrk, &oh);
1267 76164
        return (0);
1268 289556
}
1269
1270
static int
1271 119600
hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
1272
    struct objcore *oc)
1273
{
1274
        struct objhead *oh;
1275
        struct rush rush;
1276
        int r;
1277
1278 119600
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1279 119600
        TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1280
1281 119600
        Lck_AssertHeld(&oh->mtx);
1282
1283 119600
        if (oh >= private_ohs && oh < private_ohs + vcountof(private_ohs)) {
1284 57398
                assert(VTAILQ_EMPTY(&oh->waitinglist));
1285 57398
                assert(oh->refcnt > 1);
1286 57398
                oh->refcnt--;
1287 57398
                Lck_Unlock(&oh->mtx);
1288 57398
                return (1);
1289
        }
1290
1291
        //lint --e{661}
1292
        //lint -specific(-e661)
1293
        //
1294
        // because of the static array, flexelint thinks that all ohs were from
1295
        // the static array :( the above suppression applies to the remainder of
1296
        // this function body and specific walks involving this function
1297
1298 62202
        INIT_OBJ(&rush, RUSH_MAGIC);
1299 62200
        if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1300 301
                assert(oh->refcnt > 1);
1301 301
                hsh_rush1(wrk, oc, &rush);
1302 301
        }
1303
1304 62200
        if (oh->refcnt == 1)
1305 7360
                assert(VTAILQ_EMPTY(&oh->waitinglist));
1306
1307 62200
        assert(oh->refcnt > 0);
1308 62200
        r = hash->deref(wrk, oh); /* Unlocks oh->mtx */
1309 62200
        hsh_rush2(wrk, &rush);
1310 62200
        return (r);
1311 119598
}
1312
1313
static int
1314 76166
hsh_deref_objhead(struct worker *wrk, struct objhead **poh)
1315
{
1316
        struct objhead *oh;
1317
1318 76166
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1319 76166
        TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1320
1321 76166
        Lck_Lock(&oh->mtx);
1322 76166
        return (hsh_deref_objhead_unlock(wrk, &oh, NULL));
1323
}
1324
1325
void
1326 38341
HSH_Init(const struct hash_slinger *slinger)
1327
{
1328
1329 38341
        assert(DIGEST_LEN == VSHA256_LEN);      /* avoid #include pollution */
1330 38341
        hash = slinger;
1331 38341
        if (hash->start != NULL)
1332 38341
                hash->start();
1333 4945989
        for (struct objhead *oh = private_ohs;
1334 4945989
            oh < private_ohs + vcountof(private_ohs);
1335 4907648
            oh++) {
1336 4907648
                hsh_initobjhead(oh);
1337 4907648
                assert(oh->refcnt == 1);
1338 4907648
        }
1339 38341
}