Unverified Commit 8376c989 authored by Connor's avatar Connor Committed by GitHub

Fix algorithm for counting runs (#104)

* fix algorithm for counting runs
Signed-off-by: 's avatarConnor1996 <zbk602423539@gmail.com>
parent c6b93a63
...@@ -586,8 +586,9 @@ Status BlobGCJob::DeleteInputBlobFiles() { ...@@ -586,8 +586,9 @@ Status BlobGCJob::DeleteInputBlobFiles() {
for (const auto& file : blob_gc_->sampled_inputs()) { for (const auto& file : blob_gc_->sampled_inputs()) {
ROCKS_LOG_INFO(db_options_.info_log, ROCKS_LOG_INFO(db_options_.info_log,
"Titan add obsolete file [%" PRIu64 "] range [%s, %s]", "Titan add obsolete file [%" PRIu64 "] range [%s, %s]",
file->file_number(), file->smallest_key().c_str(), file->file_number(),
file->largest_key().c_str()); Slice(file->smallest_key()).ToString(true).c_str(),
Slice(file->largest_key()).ToString(true).c_str());
metrics_.gc_num_files++; metrics_.gc_num_files++;
RecordInHistogram(stats_, TitanStats::GC_INPUT_FILE_SIZE, RecordInHistogram(stats_, TitanStats::GC_INPUT_FILE_SIZE,
file->file_size()); file->file_size());
......
...@@ -608,32 +608,50 @@ TEST_F(BlobGCJobTest, LevelMergeGC) { ...@@ -608,32 +608,50 @@ TEST_F(BlobGCJobTest, LevelMergeGC) {
TEST_F(BlobGCJobTest, RangeMergeScheduler) { TEST_F(BlobGCJobTest, RangeMergeScheduler) {
NewDB(); NewDB();
int max_sorted_run = 1; auto init_files =
std::vector<std::shared_ptr<BlobFileMeta>> files; [&](std::vector<std::vector<std::pair<std::string, std::string>>>
auto add_file = [&](int file_num, const std::string& smallest, file_runs) {
const std::string& largest) { std::vector<std::shared_ptr<BlobFileMeta>> files;
auto file = int file_num = 0;
std::make_shared<BlobFileMeta>(file_num, 0, 0, 0, smallest, largest); for (auto& run : file_runs) {
file->FileStateTransit(BlobFileMeta::FileEvent::kReset); for (auto& range : run) {
files.emplace_back(file); auto file = std::make_shared<BlobFileMeta>(
}; file_num++, 0, 0, 0, range.first, range.second);
file->FileStateTransit(BlobFileMeta::FileEvent::kReset);
files.emplace_back(file);
}
}
return files;
};
// one sorted run, no file will be marked // max_sorted_run = 1
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l] // run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
for (size_t i = 0; i <= 5; i++) { // no file will be marked
add_file(i, std::string(1, 'a' + i * 2), std::string(1, 'a' + i * 2 + 1)); auto file_runs =
} std::vector<std::vector<std::pair<std::string, std::string>>>{
ScheduleRangeMerge(files, max_sorted_run); {{"a", "b"},
{"c", "d"},
{"e", "f"},
{"g", "h"},
{"i", "j"},
{"k", "l"}},
};
auto files = init_files(file_runs);
ScheduleRangeMerge(files, 1);
for (const auto& file : files) { for (const auto& file : files) {
ASSERT_EQ(file->file_state(), BlobFileMeta::FileState::kNormal); ASSERT_EQ(file->file_state(), BlobFileMeta::FileState::kNormal);
} }
// max_sorted_run = 1
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l] // run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [e, f] [g, h] // run 2: [e, f] [g, h]
// files overlaped with [e, h] will be marked // files overlapped with [e, h] will be marked
add_file(6, "e", "f"); file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
add_file(7, "g", "h"); {{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
ScheduleRangeMerge(files, max_sorted_run); {{"e", "f"}, {"g", "h"}},
};
files = init_files(file_runs);
ScheduleRangeMerge(files, 1);
for (size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
if (i == 2 || i == 3 || i == 6 || i == 7) { if (i == 2 || i == 3 || i == 6 || i == 7) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kToMerge); ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kToMerge);
...@@ -643,12 +661,16 @@ TEST_F(BlobGCJobTest, RangeMergeScheduler) { ...@@ -643,12 +661,16 @@ TEST_F(BlobGCJobTest, RangeMergeScheduler) {
} }
} }
// max_sorted_run = 1
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l] // run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [a, b] [e, f] [g, h] [l, m] // run 2: [a, b] [e, f] [g, h] [l, m]
// files overlaped with [a, b] and [e, h] will be marked // files overlapped with [a, b] and [e, h] will be marked
add_file(8, "a", "b"); file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
add_file(9, "l", "m"); {{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
ScheduleRangeMerge(files, max_sorted_run); {{"a", "b"}, {"e", "f"}, {"g", "h"}, {"l", "m"}},
};
files = init_files(file_runs);
ScheduleRangeMerge(files, 1);
for (size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
if (i == 1 || i == 4 || i == 5 || i == 9) { if (i == 1 || i == 4 || i == 5 || i == 9) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal); ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal);
...@@ -658,13 +680,93 @@ TEST_F(BlobGCJobTest, RangeMergeScheduler) { ...@@ -658,13 +680,93 @@ TEST_F(BlobGCJobTest, RangeMergeScheduler) {
} }
} }
max_sorted_run = 2; // max_sorted_run = 2
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [c, l]
// no file will be marked
file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
{{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
{{"c", "l"}},
};
files = init_files(file_runs);
ScheduleRangeMerge(files, 2);
for (const auto& file : files) {
ASSERT_EQ(file->file_state(), BlobFileMeta::FileState::kNormal);
}
// max_sorted_run = 2
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [c, d]
// run 3: [c, d]
// files overlapped with [c, d] will be marked.
file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
{{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
{{"c", "d"}},
{{"c", "d"}},
};
files = init_files(file_runs);
ScheduleRangeMerge(files, 2);
for (size_t i = 0; i < files.size(); i++) {
if (i == 1 || i == 6 || i == 7) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kToMerge);
files[i]->FileStateTransit(BlobFileMeta::FileEvent::kReset);
} else {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal);
}
}
// max_sorted_run = 2
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [b1, d]
// run 3: [a, d]
// files overlapped with [c, d] will be marked.
file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
{{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
{{"b1", "d"}},
{{"a", "d"}},
};
files = init_files(file_runs);
ScheduleRangeMerge(files, 2);
for (size_t i = 0; i < files.size(); i++) {
if (i == 1 || i == 6 || i == 7) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kToMerge);
files[i]->FileStateTransit(BlobFileMeta::FileEvent::kReset);
} else {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal);
}
}
// max_sorted_run = 2;
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [a, b] [e, f] [g, h] [l, m]
// run 3: [e, g1]
// files overlapped with [e, g] will be marked.
file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
{{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
{{"a", "b"}, {"e", "f"}, {"g", "h"}, {"l", "m"}},
{{"e", "g1"}}};
files = init_files(file_runs);
ScheduleRangeMerge(files, 2);
for (size_t i = 0; i < files.size(); i++) {
if (i == 2 || i == 3 || i == 7 || i == 8 || i == 10) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kToMerge);
files[i]->FileStateTransit(BlobFileMeta::FileEvent::kReset);
} else {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal);
}
}
// max_sorted_run = 2;
// run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l] // run 1: [a, b] [c, d] [e, f] [g, h] [i, j] [k, l]
// run 2: [a, b] [e, f] [g, h] [l, m] // run 2: [a, b] [e, f] [g, h] [l, m]
// run 3: [a, l1] // run 3: [a, l1]
// files overlaped with [a, b] and [e, h] will be marked. // files overlapped with [a, b] and [e, h] will be marked.
add_file(10, "a", "l1"); file_runs = std::vector<std::vector<std::pair<std::string, std::string>>>{
ScheduleRangeMerge(files, max_sorted_run); {{"a", "b"}, {"c", "d"}, {"e", "f"}, {"g", "h"}, {"i", "j"}, {"k", "l"}},
{{"a", "b"}, {"e", "f"}, {"g", "h"}, {"l", "m"}},
{{"a", "l1"}}};
files = init_files(file_runs);
ScheduleRangeMerge(files, 2);
for (size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
if (i == 1 || i == 4 || i == 5 || i == 9) { if (i == 1 || i == 4 || i == 5 || i == 9) {
ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal); ASSERT_EQ(files[i]->file_state(), BlobFileMeta::FileState::kNormal);
......
...@@ -70,8 +70,8 @@ class TitanDBImpl::FileManager : public BlobFileManager { ...@@ -70,8 +70,8 @@ class TitanDBImpl::FileManager : public BlobFileManager {
ROCKS_LOG_INFO(db_->db_options_.info_log, ROCKS_LOG_INFO(db_->db_options_.info_log,
"Titan adding blob file [%" PRIu64 "] range [%s, %s]", "Titan adding blob file [%" PRIu64 "] range [%s, %s]",
file.first->file_number(), file.first->file_number(),
file.first->smallest_key().c_str(), Slice(file.first->smallest_key()).ToString(true).c_str(),
file.first->largest_key().c_str()); Slice(file.first->largest_key()).ToString(true).c_str());
edit.AddBlobFile(file.first); edit.AddBlobFile(file.first);
} }
...@@ -878,20 +878,17 @@ void TitanDBImpl::MarkFileIfNeedMerge( ...@@ -878,20 +878,17 @@ void TitanDBImpl::MarkFileIfNeedMerge(
}; };
std::sort(blob_ends.begin(), blob_ends.end(), blob_ends_cmp); std::sort(blob_ends.begin(), blob_ends.end(), blob_ends_cmp);
int cur_add = 0; std::unordered_set<BlobFileMeta*> set;
int cur_remove = 0; for (auto& end : blob_ends) {
int size = blob_ends.size(); if (end.second) {
std::unordered_map<BlobFileMeta*, int> tmp; set.insert(end.first);
for (int i = 0; i < size; i++) { if (set.size() > static_cast<size_t>(max_sorted_runs)) {
if (blob_ends[i].second) { for (auto file : set) {
++cur_add; file->FileStateTransit(BlobFileMeta::FileEvent::kNeedMerge);
tmp[blob_ends[i].first] = cur_remove; }
} else {
++cur_remove;
auto record = tmp.find(blob_ends[i].first);
if (cur_add - record->second > max_sorted_runs) {
record->first->FileStateTransit(BlobFileMeta::FileEvent::kNeedMerge);
} }
} else {
set.erase(end.first);
} }
} }
} }
......
...@@ -760,6 +760,10 @@ DEFINE_uint64(blob_db_min_blob_size, 0, ...@@ -760,6 +760,10 @@ DEFINE_uint64(blob_db_min_blob_size, 0,
// Titan Options // Titan Options
DEFINE_bool(use_titan, true, "Open a Titan instance."); DEFINE_bool(use_titan, true, "Open a Titan instance.");
DEFINE_bool(titan_level_merge, false, "Enable Titan level merge.");
DEFINE_bool(titan_range_merge, false, "Enable Titan range merge.");
DEFINE_uint64(titan_min_blob_size, 0, DEFINE_uint64(titan_min_blob_size, 0,
"Smallest blob to store in a file. Blobs smaller than this " "Smallest blob to store in a file. Blobs smaller than this "
"will be inlined with the key in the LSM tree."); "will be inlined with the key in the LSM tree.");
...@@ -3825,6 +3829,8 @@ class Benchmark { ...@@ -3825,6 +3829,8 @@ class Benchmark {
options.listeners.emplace_back(listener_); options.listeners.emplace_back(listener_);
opts->min_blob_size = FLAGS_titan_min_blob_size; opts->min_blob_size = FLAGS_titan_min_blob_size;
opts->level_merge = FLAGS_titan_level_merge;
opts->range_merge = FLAGS_titan_range_merge;
opts->disable_background_gc = FLAGS_titan_disable_background_gc; opts->disable_background_gc = FLAGS_titan_disable_background_gc;
opts->max_background_gc = FLAGS_titan_max_background_gc; opts->max_background_gc = FLAGS_titan_max_background_gc;
opts->min_gc_batch_size = 128 << 20; opts->min_gc_batch_size = 128 << 20;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment