compressScrabble method

String? compressScrabble(
  1. String dictionary, {
  2. bool statistics = false,
  3. bool verbose = false,
  4. bool useLookup = false,
  5. int quickSize = -1,
})

Compress dictionary file into a string.

Implementation

String? compressScrabble(String dictionary,
    {bool statistics = false,
    bool verbose = false,
    bool useLookup = false,
    int quickSize = -1}) {
  // Lookup dimensions
  final quickLookupSize = quickSize < 0 || quickSize > lookupCharacters.length
      ? lookupCharacters.length
      : quickSize;
  final lookupSize = quickLookupSize >= lookupCharacters.length
      ? quickLookupSize
      : quickLookupSize +
          (lookupCharacters.length - quickLookupSize) *
              lookupCharacters2.length;

  // Buffer for compressed dictionary
  var buffer = Buffer(lookupCharacters, lookupCharacters2, quickLookupSize,
      wordCharacters, prefixCharacters, specialCharacters);

  // First pass computes lookup table, second pass uses it, third pass validates
  var lookupTable = <String, int>{};

  // Return value
  String? compressedBuffer;

  for (var pass = 1; pass <= 3; pass++) {
    if (pass == 2 && !useLookup) {
      // No need for second pass if no lookup table
      continue;
    }
    final stopwatch = Stopwatch()..start();

    var lines = LineSplitter().convert(dictionary);

    var last = '';
    var size = 0;
    var compressed = 0;
    var entries = <String, int>{};

    // Write buffer on pass 1 if not useLookup, else pass2
    var writing = !useLookup && pass == 1 || useLookup && pass == 2;

    // Write quickLookupSize so decoder knows it
    if (writing) {
      buffer.writeEntry(quickLookupSize.toString().padLeft(2, '0'));
    }

    for (var line in lines) {
      if (pass < 3) {
        // Compute length of common prefix with last word
        int prefixLen;
        for (prefixLen = 0;
            prefixLen < line.length &&
                prefixLen < last.length &&
                last[prefixLen] == line[prefixLen];
            // ignore: curly_braces_in_flow_control_structures
            prefixLen++);
        // Limit on prefix length
        if (prefixLen >= prefixCharacters.length) {
          prefixLen = prefixCharacters.length - 1;
        }
        var length = 1 + line.length - prefixLen;

        if (prefixLen == last.length &&
            prefixLen == line.length - 1 &&
            line[line.length - 1] == 's') {
          // Optimize plurals
          length = 1;
          if (writing) {
            buffer.writePluralEntry();
          }
        } else {
          var suffix = line.substring(prefixLen);
          var entry = buffer.getEntry(prefixLen, suffix);
          if (useLookup && pass == 1) {
            if (entries.containsKey(entry)) {
              entries[entry] = entries[entry]! + 1;
            } else {
              entries[entry] = 0;
            }
          } else {
            if (useLookup && lookupTable.containsKey(entry)) {
              var index = lookupTable[entry]!;
              if (index >= 0) {
                // First reference so write entry followed by table insert
                buffer.writeEntry(entry);
                buffer.writeInsertEntry(index);
                lookupTable[entry] = -index;
              } else {
                // Subsequent reference so write table index
                buffer.writeIndexEntry(-index);
              }
            } else {
              buffer.writeEntry(entry);
            }
          }
        }
        if (pass == 1) {
          size += line.length;
          compressed += length;
        }

        // Save last line for prefix computation
        last = line;
      } else {
        // Pass 3 validation
        var entry = buffer.readEntry();
        assert(entry == line, 'Buffer entry matches line');
      }
    }

    // Post-processing
    if (pass == 1) {
      var saving = 0;
      if (useLookup) {
        var sortedKeys = entries.keys.toList(growable: false)
          ..sort((k1, k2) => -_lookupValue(entries, k1)
              .compareTo(_lookupValue(entries, k2)));
        var index = 0;
        for (var key in sortedKeys) {
          // Is it worth adding entry to lookup table?
          var count = entries[key];
          var value = _lookupValue(entries, key);
          var lookupIndex = (index < quickLookupSize ? 1 : 2);
          var cost = lookupIndex * entries[key]! + lookupIndex + 1;
          if (cost >= value) continue;

          // Add entry to lookup table
          lookupTable[key] = index;
          saving += value - cost;
          if (statistics && verbose) {
            stdout.writeln(
                '$index: suffix $key = $count, value = $value, cost = $cost, saving = $saving');
          }

          // Break if lookup table is full
          if (++index >= lookupSize) break;
        }
      }
      if (statistics) {
        stdout.writeln(
            'Dictionary size $size prefix compression $compressed lookup saving $saving');
        if (writing) {
          var length = buffer.getBuffer().length;
          stdout.writeln('Buffer size $length');
          stdout.writeln('Pass 1: buffer written in ${stopwatch.elapsed}');
        } else {
          stdout.writeln('Pass 1: dictionary loaded in ${stopwatch.elapsed}');
        }
      }
    } else if (pass == 2) {
      if (statistics) {
        var length = buffer.getBuffer().length;
        stdout.writeln('Compressed buffer size $length');
        stdout.writeln('Pass 2: buffer written in ${stopwatch.elapsed}');
      }
    } else if (pass == 3) {
      if (statistics) {
        stdout.writeln('Pass 3: buffer validated in ${stopwatch.elapsed}');
      }
      stopwatch.start();
      compressedBuffer = buffer.getCompressedBuffer(buffer.getBuffer());
      if (statistics) {
        var length = compressedBuffer.length;
        stdout.writeln('GZIP buffer size $length');
        stdout.writeln('Pass 3: buffer compressed in ${stopwatch.elapsed}');
      }
    }
  }
  return compressedBuffer;
}