compressScrabble method
Compress dictionary file into a string.
Implementation
String? compressScrabble(String dictionary,
{bool statistics = false,
bool verbose = false,
bool useLookup = false,
int quickSize = -1}) {
// Lookup dimensions
final quickLookupSize = quickSize < 0 || quickSize > lookupCharacters.length
? lookupCharacters.length
: quickSize;
final lookupSize = quickLookupSize >= lookupCharacters.length
? quickLookupSize
: quickLookupSize +
(lookupCharacters.length - quickLookupSize) *
lookupCharacters2.length;
// Buffer for compressed dictionary
var buffer = Buffer(lookupCharacters, lookupCharacters2, quickLookupSize,
wordCharacters, prefixCharacters, specialCharacters);
// First pass computes lookup table, second pass uses it, third pass validates
var lookupTable = <String, int>{};
// Return value
String? compressedBuffer;
for (var pass = 1; pass <= 3; pass++) {
if (pass == 2 && !useLookup) {
// No need for second pass if no lookup table
continue;
}
final stopwatch = Stopwatch()..start();
var lines = LineSplitter().convert(dictionary);
var last = '';
var size = 0;
var compressed = 0;
var entries = <String, int>{};
// Write buffer on pass 1 if not useLookup, else pass2
var writing = !useLookup && pass == 1 || useLookup && pass == 2;
// Write quickLookupSize so decoder knows it
if (writing) {
buffer.writeEntry(quickLookupSize.toString().padLeft(2, '0'));
}
for (var line in lines) {
if (pass < 3) {
// Compute length of common prefix with last word
int prefixLen;
for (prefixLen = 0;
prefixLen < line.length &&
prefixLen < last.length &&
last[prefixLen] == line[prefixLen];
// ignore: curly_braces_in_flow_control_structures
prefixLen++);
// Limit on prefix length
if (prefixLen >= prefixCharacters.length) {
prefixLen = prefixCharacters.length - 1;
}
var length = 1 + line.length - prefixLen;
if (prefixLen == last.length &&
prefixLen == line.length - 1 &&
line[line.length - 1] == 's') {
// Optimize plurals
length = 1;
if (writing) {
buffer.writePluralEntry();
}
} else {
var suffix = line.substring(prefixLen);
var entry = buffer.getEntry(prefixLen, suffix);
if (useLookup && pass == 1) {
if (entries.containsKey(entry)) {
entries[entry] = entries[entry]! + 1;
} else {
entries[entry] = 0;
}
} else {
if (useLookup && lookupTable.containsKey(entry)) {
var index = lookupTable[entry]!;
if (index >= 0) {
// First reference so write entry followed by table insert
buffer.writeEntry(entry);
buffer.writeInsertEntry(index);
lookupTable[entry] = -index;
} else {
// Subsequent reference so write table index
buffer.writeIndexEntry(-index);
}
} else {
buffer.writeEntry(entry);
}
}
}
if (pass == 1) {
size += line.length;
compressed += length;
}
// Save last line for prefix computation
last = line;
} else {
// Pass 3 validation
var entry = buffer.readEntry();
assert(entry == line, 'Buffer entry matches line');
}
}
// Post-processing
if (pass == 1) {
var saving = 0;
if (useLookup) {
var sortedKeys = entries.keys.toList(growable: false)
..sort((k1, k2) => -_lookupValue(entries, k1)
.compareTo(_lookupValue(entries, k2)));
var index = 0;
for (var key in sortedKeys) {
// Is it worth adding entry to lookup table?
var count = entries[key];
var value = _lookupValue(entries, key);
var lookupIndex = (index < quickLookupSize ? 1 : 2);
var cost = lookupIndex * entries[key]! + lookupIndex + 1;
if (cost >= value) continue;
// Add entry to lookup table
lookupTable[key] = index;
saving += value - cost;
if (statistics && verbose) {
stdout.writeln(
'$index: suffix $key = $count, value = $value, cost = $cost, saving = $saving');
}
// Break if lookup table is full
if (++index >= lookupSize) break;
}
}
if (statistics) {
stdout.writeln(
'Dictionary size $size prefix compression $compressed lookup saving $saving');
if (writing) {
var length = buffer.getBuffer().length;
stdout.writeln('Buffer size $length');
stdout.writeln('Pass 1: buffer written in ${stopwatch.elapsed}');
} else {
stdout.writeln('Pass 1: dictionary loaded in ${stopwatch.elapsed}');
}
}
} else if (pass == 2) {
if (statistics) {
var length = buffer.getBuffer().length;
stdout.writeln('Compressed buffer size $length');
stdout.writeln('Pass 2: buffer written in ${stopwatch.elapsed}');
}
} else if (pass == 3) {
if (statistics) {
stdout.writeln('Pass 3: buffer validated in ${stopwatch.elapsed}');
}
stopwatch.start();
compressedBuffer = buffer.getCompressedBuffer(buffer.getBuffer());
if (statistics) {
var length = compressedBuffer.length;
stdout.writeln('GZIP buffer size $length');
stdout.writeln('Pass 3: buffer compressed in ${stopwatch.elapsed}');
}
}
}
return compressedBuffer;
}