Unverified Commit 3d777c5f authored by Zhenhan Gong's avatar Zhenhan Gong Committed by GitHub

fix titan zstd dictionary compression bug (#202)

parent 5a80cd8c
......@@ -112,6 +112,10 @@ void BlobFileBuilder::FlushSampleRecords(OutContexts* out_ctx) {
WriteEncoderData(&ctx->new_blob_index.blob_handle);
out_ctx->emplace_back(std::move(cached_contexts_[ctx_idx]));
}
for (; ctx_idx < cached_contexts_.size(); ctx_idx++) {
assert(cached_contexts_[ctx_idx]->has_value);
out_ctx->emplace_back(std::move(cached_contexts_[ctx_idx]));
}
assert(sample_idx == sample_records_.size());
assert(ctx_idx == cached_contexts_.size());
sample_records_.clear();
......
......@@ -100,6 +100,8 @@ class BlobFileBuilder {
// Number of calls to Add() so far.
uint64_t NumEntries();
// Number of sample records
uint64_t NumSampleEntries() { return sample_records_.size(); }
const std::string& GetSmallestKey() { return smallest_key_; }
const std::string& GetLargestKey() { return largest_key_; }
......
......@@ -290,7 +290,11 @@ void TitanTableBuilder::Abandon() {
}
uint64_t TitanTableBuilder::NumEntries() const {
if (builder_unbuffered()) {
return base_builder_->NumEntries();
} else {
return blob_builder_->NumEntries() + blob_builder_->NumSampleEntries();
}
}
uint64_t TitanTableBuilder::FileSize() const {
......
......@@ -429,6 +429,47 @@ TEST_F(TableBuilderTest, DictCompress) {
#endif
}
TEST_F(TableBuilderTest, DictCompressOptions) {
#if ZSTD_VERSION_NUMBER >= 10103
CompressionOptions compression_opts;
compression_opts.window_bits = -14;
compression_opts.level = 32767;
compression_opts.strategy = 0;
compression_opts.max_dict_bytes = 4000;
compression_opts.zstd_max_train_bytes = 0;
compression_opts.enabled = true;
compression_opts.max_dict_bytes = 4000;
cf_options_.blob_file_compression_options = compression_opts;
cf_options_.blob_file_compression = kZSTD;
table_factory_.reset(new TitanTableFactory(
db_options_, cf_options_, db_impl_.get(), blob_manager_, &mutex_,
blob_file_set_.get(), nullptr));
std::unique_ptr<WritableFileWriter> base_file;
NewBaseFileWriter(&base_file);
std::unique_ptr<TableBuilder> table_builder;
NewTableBuilder(base_file.get(), &table_builder);
// Build a base table and a blob file.
const int n = 100;
for (char i = 0; i < n; i++) {
std::string key(1, i);
InternalKey ikey(key, 1, kTypeValue);
std::string value;
if (i % 2 == 0) {
value = std::string(1, i);
} else {
value = std::string(kMinBlobSize, i);
}
table_builder->Add(ikey.Encode(), value);
}
ASSERT_EQ(n / 2, table_builder->NumEntries());
ASSERT_OK(table_builder->Finish());
#endif
}
TEST_F(TableBuilderTest, DictCompressDisorder) {
#if ZSTD_VERSION_NUMBER >= 10103
CompressionOptions compression_opts;
......
......@@ -380,6 +380,25 @@ TEST_F(TitanDBTest, Basic) {
}
}
TEST_F(TitanDBTest, DictCompressOptions) {
options_.min_blob_size = 1;
options_.blob_file_compression = CompressionType::kZSTD;
options_.blob_file_compression_options.window_bits = -14;
options_.blob_file_compression_options.level = 32767;
options_.blob_file_compression_options.strategy = 0;
options_.blob_file_compression_options.max_dict_bytes = 6400;
options_.blob_file_compression_options.zstd_max_train_bytes = 0;
const uint64_t kNumKeys = 500;
std::map<std::string, std::string> data;
Open();
for (uint64_t k = 1; k <= kNumKeys; k++) {
Put(k, &data);
}
Flush();
VerifyDB(data);
}
TEST_F(TitanDBTest, TableFactory) { TestTableFactory(); }
TEST_F(TitanDBTest, DbIter) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment