1 module libd.datastructures..string; 2 import libd.memory, libd.datastructures, libd.util.errorhandling; 3 4 version(X86_64){} 5 else static assert(false, "libd's string is only compatible on x86_64, at least for the foreseeable future."); 6 7 // For now, String will just use g_alloc because I feel having different strings with different allocators isn't actually worth it? 8 // If needed libd can provide a specialised allocator for strings. 9 struct String 10 { 11 /++ 12 So, this abuses a few things: 13 * Pointers only actually use 48-bits, with the upper 16-bits being sign extended. 14 * The 47th bit is pretty much always 0 in user mode, so the upper 16-bits are also 0. 15 * Little endian is an important metric for why the pointer is put last, and x86_64 is a little endian architecture. 16 17 For 'small' strings: 18 * Bytes 0-22 contain the small string. 19 * Byte 23 contains the null terminator (since I want libd's strings to always provide one without reallocation needed - cheaper integration with C libs). 20 * Byte 24 contains the 'small' length, which will always be non-0 for small strings. 21 22 For 'big' strings: 23 * Bytes 0-8 contain the length. 24 * Bytes 8-16 contain the capacity. 25 * Bytes 16-24 contain the allocated pointer. 26 * Because of little endian, and the fact the upper 16-bits of a pointer will be 0, this sets the 'small' length to 0 27 which we can use as a flag to determine between small and big strings. 28 29 Special case 'empty': 30 If the string is completely empty, then Bits 16-24 will be all 0, indicating both that there's no 'small' length, and also a null 'big' pointer. 31 + ++/ 32 private union Store 33 { 34 struct // smol 35 { 36 // D chars init to 0xFF (invalid utf-8 character), which we don't want here. 37 // Although, because it's in a union, I don't actually know how D inits this memory by default. Better safe than sorry. 38 char[22] smallString = '\0'; 39 char smallNullTerm = '\0'; 40 ubyte smallLength; 41 } 42 43 struct // big 44 { 45 size_t bigLength; 46 size_t bigCapacity; 47 char* bigPtr; 48 } 49 } 50 private Store _store; 51 static assert(typeof(this).sizeof == 24, "String isn't 24 bytes anymore :("); 52 53 private alias _alloc = g_alloc; 54 private alias Grow = DefaultGrowth; 55 56 @nogc nothrow: 57 58 @trusted 59 this(string str) 60 { 61 this = str; 62 } 63 64 @trusted // Bounds checking + always confirming pointer validity *should* make this safe. 65 this(bcstring str) 66 { 67 this = str; 68 } 69 70 this(const char* ptr) 71 { 72 import runtime.dynamicfuncs : strlen; 73 const length = strlen(ptr); 74 this.put(ptr[0..length]); 75 } 76 77 this(this) @trusted 78 { 79 if(!this.isCompletelyEmpty && !this.isSmall) 80 { 81 auto slice = this._alloc.makeArray!char(this._store.bigLength+1).slice; // We'll just allocate the length and not use Growth or capacity. 82 if(slice is null) 83 onOutOfMemoryError(slice.ptr); 84 slice[0..$-1] = this._store.bigPtr[0..this._store.bigLength]; 85 slice[$-1] = '\0'; 86 this._store.bigPtr = slice.ptr; 87 this._store.bigLength = slice.length-1; // Otherwise we include the null term in this value, which we don't do. 88 this._store.bigCapacity = slice.length-1; // ^^^ 89 } 90 } 91 92 @trusted 93 ~this() 94 { 95 this.disposeBigStringIfExists(); 96 this._store = Store.init; 97 } 98 99 @trusted 100 void putMany(Params...)(scope Params params) 101 { 102 foreach(param; params) 103 this.put(param); 104 } 105 106 @trusted // This is technically safe by itself due to all the checks, but `chars` might point to bad memory. Can't express that in D though. 107 void put(scope bcstring chars) 108 { 109 auto newLength = chars.length; 110 if(this.isSmall) 111 newLength += this._store.smallLength; 112 else 113 newLength += this._store.bigLength; 114 115 if(this.isSmall || this.isCompletelyEmpty) 116 { 117 if(newLength <= this._store.smallString.length) 118 { 119 const start = this._store.smallLength; 120 this._store.smallString[start..start + chars.length] = chars[0..$]; 121 this._store.smallLength += chars.length; 122 return; 123 } 124 125 this.moveToBigString(); 126 } 127 128 this.growBigStringIfNeeded(newLength+1); // +1 for null term. 129 const start = this._store.bigLength; 130 this._store.bigPtr[start..start+chars.length] = chars[0..$]; 131 this._store.bigLength += chars.length; 132 this._store.bigPtr[this._store.bigLength] = '\0'; 133 } 134 135 @trusted 136 void put()(scope const auto ref String str) 137 { 138 this.put(str.sliceUnsafe); 139 } 140 141 @trusted 142 void put(char ch) 143 { 144 char[] fakeArray = (&ch)[0..1]; 145 this.put(fakeArray); 146 } 147 148 void put(Range)(Range r) 149 if(!is(Range : bcstring)) 150 { 151 foreach(value; r) 152 this.put(value); 153 } 154 155 @trusted 156 bool opEquals(scope bcstring other) const 157 { 158 return __equals(this.sliceUnsafe, other); 159 } 160 161 @trusted 162 bool opEquals()(scope auto ref const String other) const 163 { 164 return this.sliceUnsafe == other.sliceUnsafe; 165 } 166 167 @safe 168 bool opEquals(typeof(null) _) const 169 { 170 return this.isCompletelyEmpty; 171 } 172 173 @trusted 174 void opAssign(bcstring str) 175 { 176 if(str is null) 177 this = null; 178 else if(str.length <= this._store.smallString.length) 179 this.setSmallString(str); 180 else 181 this.setBigString(str); 182 } 183 184 @trusted 185 void opAssign(typeof(null) _) 186 { 187 this.__xdtor(); 188 } 189 190 @safe 191 size_t opDollar() const 192 { 193 return this.length; 194 } 195 196 @trusted 197 bcstring opIndex() const 198 { 199 return this.sliceUnsafe; 200 } 201 202 @trusted 203 char opIndex(size_t index) const 204 { 205 assert(index < this.length, "Index is out of bounds."); 206 return this.sliceUnsafe[index]; 207 } 208 209 @trusted // Function is @safe, further usage by user is not. 210 bcstring opSlice(size_t start, size_t end) const 211 { 212 assert(end <= this.length, "End index is out of bounds."); 213 assert(start <= end, "Start index is greater than End index."); 214 return this.sliceUnsafe[start..end]; 215 } 216 217 @trusted // HEAVILY assumes that the allocated memory is still valid. Since at the moment we always use g_alloc, this should be guarenteed outside of bugs in this struct. 218 void opIndexAssign(char v, size_t index) 219 { 220 assert(index < this.length, "Index is out of bounds."); 221 cast()this.sliceUnsafe[index] = v; // cast away const is fine for internal functions like this. 222 } 223 224 @trusted 225 void opSliceAssign(char v, size_t start, size_t end) 226 { 227 auto slice = cast(char[])this[start..end]; 228 slice[] = v; 229 } 230 231 @trusted 232 void opSliceAssign(bcstring str, size_t start, size_t end) 233 { 234 auto slice = cast(char[])this[start..end]; 235 assert(end - start == str.length, "Mismatch between str.length, and (end - start)."); 236 slice[0..$] = str[0..$]; 237 } 238 239 @trusted 240 String opBinary(string op)(const scope auto ref String rhs) const 241 if(op == "~") 242 { 243 String ret = cast()this; // NRVO better come into play here. 244 ret.put(rhs); 245 return ret; 246 } 247 248 @trusted 249 String opBinary(string op)(scope bcstring rhs) const 250 if(op == "~") 251 { 252 String ret = cast()this; // NRVO better come into play here. 253 ret.put(rhs); 254 return ret; 255 } 256 257 @trusted 258 void opOpAssign(string op)(const scope auto ref String rhs) 259 if(op == "~") 260 { 261 this.put(rhs); 262 } 263 264 @trusted 265 void opOpAssign(string op)(scope bcstring rhs) 266 if(op == "~") 267 { 268 this.put(rhs); 269 } 270 271 alias range = rangeUnsafe; // Could probably make a safe/safer custom range in the future. 272 @property 273 bcstring rangeUnsafe() const 274 { 275 return this.sliceUnsafe; 276 } 277 278 @property @safe 279 size_t length() const 280 { 281 return (this.isSmall) ? this._store.smallLength : this._store.bigLength; 282 } 283 284 @property 285 void length(size_t newLen) 286 { 287 if(this.isSmall || this.isCompletelyEmpty) 288 { 289 if(newLen > this._store.smallString.length) 290 { 291 this.moveToBigString(); 292 this.length = newLen; // So we don't have to duplicate logic. 293 } 294 else 295 this._store.smallLength = cast(ubyte)newLen; 296 return; 297 } 298 299 // Lazy choice: Once we're a big string, we're always a big string. 300 // Will eventually *not* do this, but >x3 301 if(newLen > this._store.bigLength) 302 { 303 const start = this._store.bigLength; 304 this.growBigStringIfNeeded(newLen); 305 this._store.bigPtr[start..newLen] = char.init; 306 } 307 308 this._store.bigLength = newLen; 309 this._store.bigPtr[newLen] = '\0'; 310 } 311 312 @property 313 const(char)* ptrUnsafe() const return 314 { 315 return (this.isSmall) ? &this._store.smallString[0] : this._store.bigPtr; 316 } 317 318 @property 319 bcstring sliceUnsafe() const return 320 { 321 return (this.isSmall) ? this._store.smallString[0..this._store.smallLength] : this._store.bigPtr[0..this._store.bigLength]; 322 } 323 324 @trusted 325 private void setSmallString(scope bcstring chars) 326 { 327 assert(chars.length <= this._store.smallString.length); 328 this.disposeBigStringIfExists(); // Resets us to a "completely empty" state. 329 this._store.smallString[0..chars.length] = chars[0..$]; 330 this._store.smallLength = cast(ubyte)chars.length; 331 } 332 333 @trusted 334 private void setBigString(scope bcstring chars) 335 { 336 this.growBigStringIfNeeded(chars.length+1); // +1 for null term. 337 assert(this._store.smallLength == 0, "Nani?"); 338 this._store.bigLength = chars.length; 339 this._store.bigPtr[0..chars.length] = chars[0..$]; 340 this._store.bigPtr[chars.length] = '\0'; 341 assert(!this.isSmall, "Eh?"); 342 } 343 344 @trusted 345 private void moveToBigString() 346 { 347 if(this.isCompletelyEmpty || !this.isSmall) 348 return; 349 350 // Have to copy into a buffer first, otherwise setBigString will overwrite the string data before it ends up copying it. 351 char[22] buffer; 352 buffer[0..$] = this._store.smallString[0..$]; 353 const smallLength = this._store.smallLength; 354 this.setBigString(buffer[0..smallLength]); 355 } 356 357 @trusted 358 private void growBigStringIfNeeded(size_t newSize) 359 { 360 if(this.isCompletelyEmpty || this.isSmall) 361 { 362 this._store.bigCapacity = Grow.grow(newSize); 363 this._store.bigPtr = this._alloc.makeArray!char(this._store.bigCapacity).ptr; 364 if(this._store.bigPtr is null) 365 onOutOfMemoryError(null); 366 return; 367 } 368 369 if(newSize > this._store.bigCapacity) 370 { 371 const oldCapacity = this._store.bigCapacity; 372 this._store.bigCapacity = Grow.grow(newSize); 373 this._store.bigPtr = this._alloc.growArray!char( 374 this._store.bigCapacity, 375 this._store.bigPtr[0..oldCapacity].notNull!(_alloc.Tag) 376 ).ptr; 377 if(this._store.bigPtr is null) 378 onOutOfMemoryError(null); 379 } 380 } 381 382 @trusted 383 private void disposeBigStringIfExists() 384 { 385 if(!this.isCompletelyEmpty && !this.isSmall) 386 { 387 this._alloc.dispose(this._store.bigPtr); // set to null by .dispose 388 this._store.smallString[] = '\0'; 389 assert(this.isCompletelyEmpty, "?"); 390 } 391 } 392 393 @trusted 394 private bool isCompletelyEmpty() const 395 { 396 return this._store.bigPtr is null; 397 } 398 399 @safe 400 private bool isSmall() const 401 { 402 return this._store.smallLength > 0; 403 } 404 } 405 /// 406 @("String") 407 unittest 408 { 409 auto s = String("Hello"); 410 assert(s.isSmall); // .isSmall is a private function 411 assert(!s.isCompletelyEmpty); // ^^^ 412 assert(s.length == 5); 413 assert(s == "Hello"); 414 assert(s.ptrUnsafe[5] == '\0'); 415 416 auto s2 = s; 417 assert(s2.isSmall && !s2.isCompletelyEmpty); 418 assert(s2.length == 5); 419 assert(s2 == "Hello"); 420 s2.put(", world!"); 421 assert(s2.length == 13); 422 assert(s.length == 5); 423 assert(s2 == "Hello, world!"); 424 assert(s2.ptrUnsafe[13] == '\0'); 425 426 s = String("This is a big string that is bigger than 22 characters long!"); 427 assert(!s.isSmall); 428 assert(s.length == 60); 429 assert(s == "This is a big string that is bigger than 22 characters long!"); 430 assert(s.ptrUnsafe[60] == '\0'); 431 432 s2 = s; 433 assert(!s2.isSmall); 434 assert(s2.length == 60); 435 assert(s2._store.bigPtr !is s._store.bigPtr); 436 s.__xdtor(); 437 s2.put("This shouldn't crash because we copied things."); 438 assert(s2 == "This is a big string that is bigger than 22 characters long!This shouldn't crash because we copied things."); 439 assert(s2.ptrUnsafe[s2.length] == '\0'); 440 441 s2.length = 60; 442 assert(s2.length == 60); 443 assert(s2 == "This is a big string that is bigger than 22 characters long!"); 444 assert(s2.ptrUnsafe[60] == '\0'); 445 446 s2.length = 61; 447 assert(s2 == "This is a big string that is bigger than 22 characters long!"~char.init); 448 assert(s2.ptrUnsafe[61] == '\0'); 449 450 // Making sure we don't crash when using any of these things from a .init state. 451 s2.__xdtor(); 452 assert(!s2.isSmall && s2.isCompletelyEmpty); 453 assert(s2.ptrUnsafe is null); 454 assert(s2.sliceUnsafe is null); 455 assert(s2.length == 0); 456 s2.put("abc"); 457 assert(s2.isSmall); 458 459 assert(s2 == "abc"); 460 assert(s2 == String("abc")); 461 assert(s2 != null); 462 s2 = null; 463 assert(s2 == null); 464 465 s2 = "abc"; 466 assert(s2.isSmall); 467 assert(s2 == "abc"); 468 assert(s2[1] == 'b'); 469 assert(s2[0..2] == "ab"); 470 assert(s2[3..3].length == 0); 471 assert(s2[] == "abc"); 472 473 s2[1] = 'd'; 474 assert(s2 == "adc"); 475 s2[0..2] = 'b'; 476 assert(s2 == "bbc"); 477 s2[0..3] = "put"; 478 assert(s2 == "put"); 479 480 assert(s2 ~ "in" == "putin"); 481 assert(s2 ~ String("ty") == "putty"); 482 s2 ~= " it "; 483 assert(s2 == "put it "); 484 s2 ~= String("in mah belleh"); 485 assert(s2 == "put it in mah belleh"); 486 }