Comptime is cool, but you might have noticed that it has some quirks. One of these quirks is the property that each referenced pointer will make it into the final binary.
Also if you compute strings and they repeat, each string receives its own unique memory address.
Consider this example where we collect all capitalized words from Lorem Ipsum:
const std = @import("std");
const input = @embedFile("input.txt");
export const capitalized = blk: {
@setEvalBranchQuota(100_000);
var result: []const []const u8 = &.{};
var iter = std.mem.tokenize(u8, input, ",.\r\n ");
while (iter.next()) |word| {
if (std.ascii.isUpper(word[0])) {
result = result ++ [1][]const u8{word};
}
}
break :blk result;
};
The input.txt is several kilobytes large, and as we reference it via the tokenizer, it gets put as a whole into the final binary. This is not optimal, as we are only interested in the capitalized words only, the rest is garbage.
I figured a pretty easy way to intern strings such that they will only be exactly once in the final executable, no matter how many source memory regions we have. This is done by converting the string into an array, then return a reference to that array:
fn internString(comptime str: []const u8) []const u8 {
return internStringBuffer(str.len, str[0..str.len].*);
}
fn internStringBuffer(comptime len: comptime_int, comptime items: [len]u8) []const u8 {
comptime var storage: [len]u8 = items;
return &storage;
}
This uses the fact that comptime
calls are memoized, and as we pass only concrete values to internStringBuffer
, it will only get called once and for subsequent calls will always return the same memory reference.
So we can adjust our example like this:
const std = @import("std");
const input = @embedFile("input.txt");
export const capitalized = blk: {
@setEvalBranchQuota(100_000);
var result: []const []const u8 = &.{};
var iter = std.mem.tokenize(u8, input, ",.\r\n ");
while (iter.next()) |word| {
if (std.ascii.isUpper(word[0])) {
result = result ++ [1][]const u8{
internString(word),
};
}
}
break :blk result;
};
fn internString(comptime str: []const u8) []const u8 {
return internStringBuffer(str.len, str[0..str.len].*);
}
fn internStringBuffer(comptime len: comptime_int, comptime items: [len]u8) []const u8 {
comptime var storage: [len]u8 = items;
return &storage;
}
But did it work as expected? Let's check!
First, let's compile both examples into a shared object, then dump the .rodata
section (which is where our strings are put):
zig build-lib -dynamic -O ReleaseSmall -fno-compiler-rt bad.zig
zig build-lib -dynamic -O ReleaseSmall -fno-compiler-rt good.zig
objdump libbad.so -s -j .rodata > bad.dump
objdump libgood.so -s -j .rodata > good.dump
If we check the sizes of our shared objects, libgood.so
is roughly 4 kB smaller than libbad.so
, so it seems like we've made it work.
Checking both dumps, we can verify that it worked:
libbad.so: file format elf64-x86-64
Contents of section .rodata:
0c90 4c6f7265 6d206970 73756d20 646f6c6f Lorem ipsum dolo
0ca0 72207369 7420616d 65742c20 636f6e73 r sit amet, cons
0cb0 65637465 74756572 20616469 70697363 ectetuer adipisc
0cc0 696e6720 656c6974 2e204165 6e65616e ing elit. Aenean
0cd0 20636f6d 6d6f646f 206c6967 756c6120 commodo ligula
0ce0 65676574 20646f6c 6f722e20 41656e65 eget dolor. Aene
0cf0 616e206d 61737361 2e204375 6d20736f an massa. Cum so
0d00 63696973 206e6174 6f717565 2070656e ciis natoque pen
0d10 61746962 75732065 74206d61 676e6973 atibus et magnis
0d20 20646973 20706172 74757269 656e7420 dis parturient
0d30 6d6f6e74 65732c20 6e617363 65747572 montes, nascetur
0d40 20726964 6963756c 7573206d 75732e20 ridiculus mus.
0d50 446f6e65 63207175 616d2066 656c6973 Donec quam felis
0d60 2c20756c 74726963 69657320 6e65632c , ultricies nec,
0d70 2070656c 6c656e74 65737175 65206575 pellentesque eu
0d80 2c207072 65746975 6d207175 69732c20 , pretium quis,
0d90 73656d2e 204e756c 6c612063 6f6e7365 sem. Nulla conse
0da0 71756174 206d6173 73612071 75697320 quat massa quis
0db0 656e696d 2e20446f 6e656320 70656465 enim. Donec pede
0dc0 206a7573 746f2c20 6672696e 67696c6c justo, fringill
0dd0 61207665 6c2c2061 6c697175 6574206e a vel, aliquet n
<snip 280 lines>
1f50 20536564 206d6167 6e6100 Sed magna.
Okay, so this looks like we expected it to look like. The whole file is put into the final executable. Not good.
Let's check our improved version:
libgood.so: file format elf64-x86-64
Contents of section .rodata:
0c90 4c6f7265 6d41656e 65616e43 756d446f LoremAeneanCumDo
0ca0 6e65634e 756c6c61 496e4e75 6c6c616d necNullaInNullam
0cb0 496e7465 67657256 6976616d 7573416c IntegerVivamusAl
0cc0 69717561 6d506861 73656c6c 75735175 iquamPhasellusQu
0cd0 69737175 65457469 616d4375 72616269 isqueEtiamCurabi
0ce0 7475724e 616d5365 64467573 63655665 turNamSedFusceVe
0cf0 73746962 756c756d 43757261 653b5065 stibulumCurae;Pe
0d00 6c6c656e 74657371 75655375 7370656e llentesqueSuspen
0d10 64697373 65557450 726f696e 4d6f7262 disseUtProinMorb
0d20 69447569 73437261 734e756e 634d6165 iDuisCrasNuncMae
0d30 63656e61 73507261 6573656e 74 cenasPraesent
Yes. That's the whole .rodata
section. Looks like everything got cleanly deduplicated and there's no trace of the full Lorem Ipsum. Nice!
Now go, and use this pattern to make your Zig programs smaller, faster and better! Hush hush!
Latest comments (0)