| | varnish-cache/bin/varnishd/storage/storage_persistent.h |
| 0 |
|
/*- |
| 1 |
|
* Copyright (c) 2008-2011 Varnish Software AS |
| 2 |
|
* All rights reserved. |
| 3 |
|
* |
| 4 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
| 5 |
|
* |
| 6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
| 7 |
|
* |
| 8 |
|
* Redistribution and use in source and binary forms, with or without |
| 9 |
|
* modification, are permitted provided that the following conditions |
| 10 |
|
* are met: |
| 11 |
|
* 1. Redistributions of source code must retain the above copyright |
| 12 |
|
* notice, this list of conditions and the following disclaimer. |
| 13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
| 14 |
|
* notice, this list of conditions and the following disclaimer in the |
| 15 |
|
* documentation and/or other materials provided with the distribution. |
| 16 |
|
* |
| 17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
| 21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 |
|
* SUCH DAMAGE. |
| 28 |
|
* |
| 29 |
|
* Persistent storage method |
| 30 |
|
* |
| 31 |
|
* XXX: Before we start the client or maybe after it stops, we should give the |
| 32 |
|
* XXX: stevedores a chance to examine their storage for consistency. |
| 33 |
|
* |
| 34 |
|
* XXX: Do we ever free the LRU-lists ? |
| 35 |
|
*/ |
| 36 |
|
|
| 37 |
|
/* |
| 38 |
|
* |
| 39 |
|
* Overall layout: |
| 40 |
|
* |
| 41 |
|
* struct smp_ident; Identification and geometry |
| 42 |
|
* sha256[...] checksum of same |
| 43 |
|
* |
| 44 |
|
* struct smp_sign; |
| 45 |
|
* banspace_1; First ban-space |
| 46 |
|
* sha256[...] checksum of same |
| 47 |
|
* |
| 48 |
|
* struct smp_sign; |
| 49 |
|
* banspace_2; Second ban-space |
| 50 |
|
* sha256[...] checksum of same |
| 51 |
|
* |
| 52 |
|
* struct smp_sign; |
| 53 |
|
* struct smp_segment_1[N]; First Segment table |
| 54 |
|
* sha256[...] checksum of same |
| 55 |
|
* |
| 56 |
|
* struct smp_sign; |
| 57 |
|
* struct smp_segment_2[N]; Second Segment table |
| 58 |
|
* sha256[...] checksum of same |
| 59 |
|
* |
| 60 |
|
* N segments { |
| 61 |
|
* struct smp_sign; |
| 62 |
|
* struct smp_object[M] Objects in segment |
| 63 |
|
* sha256[...] checksum of same |
| 64 |
|
* objspace |
| 65 |
|
* } |
| 66 |
|
* |
| 67 |
|
*/ |
| 68 |
|
|
| 69 |
|
/* |
| 70 |
|
* The identblock is located in the first sector of the storage space. |
| 71 |
|
* This is written once and not subsequently modified in normal operation. |
| 72 |
|
* It is immediately followed by a SHA256sum of the structure, as stored. |
| 73 |
|
*/ |
| 74 |
|
|
| 75 |
|
struct smp_ident { |
| 76 |
|
char ident[32]; /* Human readable ident |
| 77 |
|
* so people and programs |
| 78 |
|
* can tell what the file |
| 79 |
|
* or device contains. |
| 80 |
|
*/ |
| 81 |
|
|
| 82 |
|
uint32_t byte_order; /* 0x12345678 */ |
| 83 |
|
|
| 84 |
|
uint32_t size; /* sizeof(struct smp_ident) */ |
| 85 |
|
|
| 86 |
|
uint32_t major_version; |
| 87 |
|
|
| 88 |
|
uint32_t unique; |
| 89 |
|
|
| 90 |
|
uint32_t align; /* alignment in silo */ |
| 91 |
|
|
| 92 |
|
uint32_t granularity; /* smallest ... in bytes */ |
| 93 |
|
|
| 94 |
|
uint64_t mediasize; /* ... in bytes */ |
| 95 |
|
|
| 96 |
|
uint64_t stuff[6]; /* pointers to stuff */ |
| 97 |
|
#define SMP_BAN1_STUFF 0 |
| 98 |
|
#define SMP_BAN2_STUFF 1 |
| 99 |
|
#define SMP_SEG1_STUFF 2 |
| 100 |
|
#define SMP_SEG2_STUFF 3 |
| 101 |
|
#define SMP_SPC_STUFF 4 |
| 102 |
|
#define SMP_END_STUFF 5 |
| 103 |
|
}; |
| 104 |
|
|
| 105 |
|
/* |
| 106 |
|
* The size of smp_ident should be fixed and constant across all platforms. |
| 107 |
|
* We enforce that with the following #define and an assert in smp_init() |
| 108 |
|
*/ |
| 109 |
|
#define SMP_IDENT_SIZE 112 |
| 110 |
|
|
| 111 |
|
#define SMP_IDENT_STRING "Varnish Persistent Storage Silo" |
| 112 |
|
|
| 113 |
|
/* |
| 114 |
|
* This is used to sign various bits on the disk. |
| 115 |
|
*/ |
| 116 |
|
|
| 117 |
|
struct smp_sign { |
| 118 |
|
char ident[8]; |
| 119 |
|
uint32_t unique; |
| 120 |
|
uint64_t mapped; |
| 121 |
|
/* The length field is the length of the signed data only |
| 122 |
|
* (does not include struct smp_sign) */ |
| 123 |
|
uint64_t length; /* NB: Must be last */ |
| 124 |
|
}; |
| 125 |
|
|
| 126 |
|
#define SMP_SIGN_SPACE (sizeof(struct smp_sign) + VSHA256_LEN) |
| 127 |
|
|
| 128 |
|
/* |
| 129 |
|
* A segment pointer. |
| 130 |
|
*/ |
| 131 |
|
|
| 132 |
|
struct smp_segptr { |
| 133 |
|
uint64_t offset; /* rel to silo */ |
| 134 |
|
uint64_t length; /* rel to offset */ |
| 135 |
|
uint64_t objlist; /* rel to silo */ |
| 136 |
|
uint32_t lobjlist; /* len of objlist */ |
| 137 |
|
}; |
| 138 |
|
|
| 139 |
|
/* |
| 140 |
|
* An object descriptor |
| 141 |
|
* |
| 142 |
|
* A positive ttl is obj.ttl with obj.grace being NAN |
| 143 |
|
* A negative ttl is - (obj.ttl + obj.grace) |
| 144 |
|
*/ |
| 145 |
|
|
| 146 |
|
struct smp_object { |
| 147 |
|
uint8_t hash[32]; /* really: DIGEST_LEN */ |
| 148 |
|
double t_origin; |
| 149 |
|
float ttl; |
| 150 |
|
float grace; |
| 151 |
|
float keep; |
| 152 |
|
uint32_t __filler__; /* -> align/8 on 32bit */ |
| 153 |
|
double ban; |
| 154 |
|
uint64_t ptr; /* rel to silo */ |
| 155 |
|
}; |
| 156 |
|
|
| 157 |
|
#define ASSERT_SILO_THREAD(sc) \ |
| 158 |
|
do {assert(pthread_equal(pthread_self(), (sc)->thread));} while (0) |
| 159 |
|
|
| 160 |
|
/* |
| 161 |
|
* Context for a signature. |
| 162 |
|
* |
| 163 |
|
* A signature is a sequence of bytes in the silo, signed by a SHA256 hash |
| 164 |
|
* which follows the bytes. |
| 165 |
|
* |
| 166 |
|
* The context structure allows us to append to a signature without |
| 167 |
|
* recalculating the entire SHA256 hash. |
| 168 |
|
*/ |
| 169 |
|
|
| 170 |
|
struct smp_signctx { |
| 171 |
|
struct smp_sign *ss; |
| 172 |
|
struct VSHA256Context ctx; |
| 173 |
|
uint32_t unique; |
| 174 |
|
const char *id; |
| 175 |
|
}; |
| 176 |
|
|
| 177 |
|
/* |
| 178 |
|
* A space wrapped by a signature |
| 179 |
|
* |
| 180 |
|
* A signspace is a chunk of the silo that is wrapped by a |
| 181 |
|
* signature. It has attributes for size, so range checking can be |
| 182 |
|
* performed. |
| 183 |
|
* |
| 184 |
|
*/ |
| 185 |
|
|
| 186 |
|
struct smp_signspace { |
| 187 |
|
struct smp_signctx ctx; |
| 188 |
|
uint8_t *start; |
| 189 |
|
uint64_t size; |
| 190 |
|
}; |
| 191 |
|
|
| 192 |
|
struct smp_sc; |
| 193 |
|
|
| 194 |
|
/* XXX: name confusion with on-media version ? */ |
| 195 |
|
struct smp_seg { |
| 196 |
|
unsigned magic; |
| 197 |
|
#define SMP_SEG_MAGIC 0x45c61895 |
| 198 |
|
|
| 199 |
|
struct smp_sc *sc; |
| 200 |
|
VTAILQ_HEAD(,objcore) objcores; |
| 201 |
|
|
| 202 |
|
VTAILQ_ENTRY(smp_seg) list; /* on smp_sc.smp_segments */ |
| 203 |
|
|
| 204 |
|
struct smp_segptr p; |
| 205 |
|
|
| 206 |
|
unsigned flags; |
| 207 |
|
#define SMP_SEG_MUSTLOAD (1 << 0) |
| 208 |
|
#define SMP_SEG_LOADED (1 << 1) |
| 209 |
|
|
| 210 |
|
uint32_t nobj; /* Number of objects */ |
| 211 |
|
uint32_t nalloc; /* Allocations */ |
| 212 |
|
uint32_t nfixed; /* How many fixed objects */ |
| 213 |
|
|
| 214 |
|
/* Only for open segment */ |
| 215 |
|
struct smp_object *objs; /* objdesc array */ |
| 216 |
|
struct smp_signctx ctx[1]; |
| 217 |
|
}; |
| 218 |
|
|
| 219 |
|
VTAILQ_HEAD(smp_seghead, smp_seg); |
| 220 |
|
|
| 221 |
|
struct smp_sc { |
| 222 |
|
unsigned magic; |
| 223 |
|
#define SMP_SC_MAGIC 0x7b73af0a |
| 224 |
|
struct stevedore *parent; |
| 225 |
|
|
| 226 |
|
pthread_t bgthread; |
| 227 |
|
unsigned flags; |
| 228 |
|
#define SMP_SC_LOADED (1 << 0) |
| 229 |
|
#define SMP_SC_STOP (1 << 1) |
| 230 |
|
|
| 231 |
|
const struct stevedore *stevedore; |
| 232 |
|
int fd; |
| 233 |
|
const char *filename; |
| 234 |
|
uint64_t mediasize; |
| 235 |
|
uintptr_t align; |
| 236 |
|
uint32_t granularity; |
| 237 |
|
uint32_t unique; |
| 238 |
|
|
| 239 |
|
uint8_t *base; |
| 240 |
|
|
| 241 |
|
struct smp_ident *ident; |
| 242 |
|
|
| 243 |
|
struct smp_seghead segments; |
| 244 |
|
struct smp_seg *cur_seg; |
| 245 |
|
uint64_t next_bot; /* next alloc address bottom */ |
| 246 |
|
uint64_t next_top; /* next alloc address top */ |
| 247 |
|
|
| 248 |
|
uint64_t free_offset; |
| 249 |
|
|
| 250 |
|
pthread_t thread; |
| 251 |
|
|
| 252 |
|
VTAILQ_ENTRY(smp_sc) list; |
| 253 |
|
|
| 254 |
|
struct smp_signctx idn; |
| 255 |
|
struct smp_signspace ban1; |
| 256 |
|
struct smp_signspace ban2; |
| 257 |
|
struct smp_signspace seg1; |
| 258 |
|
struct smp_signspace seg2; |
| 259 |
|
|
| 260 |
|
struct lock mtx; |
| 261 |
|
|
| 262 |
|
/* Cleaner metrics */ |
| 263 |
|
|
| 264 |
|
unsigned min_nseg; |
| 265 |
|
unsigned aim_nseg; |
| 266 |
|
unsigned max_nseg; |
| 267 |
|
|
| 268 |
|
uint64_t min_segl; |
| 269 |
|
uint64_t aim_segl; |
| 270 |
|
uint64_t max_segl; |
| 271 |
|
|
| 272 |
|
uint64_t free_reserve; |
| 273 |
|
}; |
| 274 |
|
|
| 275 |
|
/*--------------------------------------------------------------------*/ |
| 276 |
|
|
| 277 |
|
/* Pointer round up/down & assert */ |
| 278 |
|
#define PRNUP(sc, x) ((void*)RUP2((uintptr_t)(x), sc->align)) |
| 279 |
|
|
| 280 |
|
/* Integer round up/down & assert */ |
| 281 |
|
#define IRNDN(sc, x) RDN2(x, sc->align) |
| 282 |
|
#define IRNUP(sc, x) RUP2(x, sc->align) |
| 283 |
|
#define IASSERTALIGN(sc, x) assert(IRNDN(sc, x) == (x)) |
| 284 |
|
|
| 285 |
|
/*--------------------------------------------------------------------*/ |
| 286 |
|
|
| 287 |
|
#define ASSERT_PTR_IN_SILO(sc, ptr) \ |
| 288 |
|
assert((const void*)(ptr) >= (const void*)((sc)->base) && \ |
| 289 |
|
(const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize)) |
| 290 |
|
|
| 291 |
|
/*--------------------------------------------------------------------*/ |
| 292 |
|
|
| 293 |
|
#define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1)) |
| 294 |
|
#define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length)) |
| 295 |
|
|
| 296 |
|
#define SIGNSPACE_DATA(spc) (SIGN_DATA(&(spc)->ctx)) |
| 297 |
|
#define SIGNSPACE_FRONT(spc) (SIGN_END(&(spc)->ctx)) |
| 298 |
|
#define SIGNSPACE_LEN(spc) ((spc)->ctx.ss->length) |
| 299 |
|
#define SIGNSPACE_FREE(spc) ((spc)->size - SIGNSPACE_LEN(spc)) |
| 300 |
|
|
| 301 |
|
/* storage_persistent_mgt.c */ |
| 302 |
|
|
| 303 |
|
void smp_mgt_init(struct stevedore *parent, int ac, char * const *av); |
| 304 |
|
|
| 305 |
|
/* storage_persistent_silo.c */ |
| 306 |
|
|
| 307 |
|
void smp_load_seg(struct worker *, const struct smp_sc *sc, struct smp_seg *sg); |
| 308 |
|
void smp_new_seg(struct smp_sc *sc); |
| 309 |
|
void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg); |
| 310 |
|
void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx); |
| 311 |
|
void smp_save_segs(struct smp_sc *sc); |
| 312 |
|
sml_getobj_f smp_sml_getobj; |
| 313 |
|
void smp_oc_objfree(struct worker *, struct objcore *); |
| 314 |
|
obj_event_f smp_oc_event; |
| 315 |
|
|
| 316 |
|
/* storage_persistent_subr.c */ |
| 317 |
|
|
| 318 |
|
void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx, |
| 319 |
|
uint64_t off, const char *id); |
| 320 |
|
int smp_chk_sign(struct smp_signctx *ctx); |
| 321 |
|
void smp_reset_sign(struct smp_signctx *ctx); |
| 322 |
|
void smp_sync_sign(const struct smp_signctx *ctx); |
| 323 |
|
|
| 324 |
|
int smp_chk_signspace(struct smp_signspace *spc); |
| 325 |
|
void smp_append_signspace(struct smp_signspace *spc, uint32_t len); |
| 326 |
|
void smp_reset_signspace(struct smp_signspace *spc); |
| 327 |
|
void smp_copy_signspace(struct smp_signspace *dst, |
| 328 |
|
const struct smp_signspace *src); |
| 329 |
|
|
| 330 |
|
void smp_newsilo(struct smp_sc *sc); |
| 331 |
|
int smp_valid_silo(struct smp_sc *sc); |
| 332 |
|
|
| 333 |
|
/*-------------------------------------------------------------------- |
| 334 |
|
* Calculate payload of some stuff |
| 335 |
|
*/ |
| 336 |
|
|
| 337 |
|
static inline uint64_t |
| 338 |
53680 |
smp_stuff_len(const struct smp_sc *sc, unsigned stuff) |
| 339 |
|
{ |
| 340 |
|
uint64_t l; |
| 341 |
|
|
| 342 |
53680 |
assert(stuff < SMP_END_STUFF); |
| 343 |
53680 |
l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff]; |
| 344 |
53680 |
l -= SMP_SIGN_SPACE; |
| 345 |
53680 |
return (l); |
| 346 |
|
} |
| 347 |
|
|
| 348 |
|
static inline uint64_t |
| 349 |
5680 |
smp_segend(const struct smp_seg *sg) |
| 350 |
|
{ |
| 351 |
|
|
| 352 |
5680 |
return (sg->p.offset + sg->p.length); |
| 353 |
|
} |
| 354 |
|
|
| 355 |
|
static inline uint64_t |
| 356 |
3760 |
smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg) |
| 357 |
|
{ |
| 358 |
|
|
| 359 |
3760 |
IASSERTALIGN(sc, sc->next_bot); |
| 360 |
3760 |
assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE)); |
| 361 |
3760 |
assert(sc->next_bot >= sg->p.offset); |
| 362 |
3760 |
assert(sc->next_top < sg->p.offset + sg->p.length); |
| 363 |
3760 |
return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE)); |
| 364 |
|
} |