Unverified Commit f01a1b22 authored by zhangjinpeng1987's avatar zhangjinpeng1987 Committed by GitHub

delete range bug fix

parent c141298a
...@@ -1040,6 +1040,7 @@ Status CompactionJob::FinishCompactionOutputFile( ...@@ -1040,6 +1040,7 @@ Status CompactionJob::FinishCompactionOutputFile(
auto meta = &sub_compact->current_output()->meta; auto meta = &sub_compact->current_output()->meta;
if (s.ok()) { if (s.ok()) {
Slice lower_bound_guard, upper_bound_guard; Slice lower_bound_guard, upper_bound_guard;
std::string smallest_user_key;
const Slice *lower_bound, *upper_bound; const Slice *lower_bound, *upper_bound;
if (sub_compact->outputs.size() == 1) { if (sub_compact->outputs.size() == 1) {
// For the first output table, include range tombstones before the min key // For the first output table, include range tombstones before the min key
...@@ -1049,7 +1050,8 @@ Status CompactionJob::FinishCompactionOutputFile( ...@@ -1049,7 +1050,8 @@ Status CompactionJob::FinishCompactionOutputFile(
// For subsequent output tables, only include range tombstones from min // For subsequent output tables, only include range tombstones from min
// key onwards since the previous file was extended to contain range // key onwards since the previous file was extended to contain range
// tombstones falling before min key. // tombstones falling before min key.
lower_bound_guard = meta->smallest.user_key(); smallest_user_key = meta->smallest.user_key().ToString(false /*hex*/);
lower_bound_guard = Slice(smallest_user_key);
lower_bound = &lower_bound_guard; lower_bound = &lower_bound_guard;
} else { } else {
lower_bound = nullptr; lower_bound = nullptr;
......
...@@ -1439,6 +1439,60 @@ TEST_F(DBCompactionTest, DeleteFileRange) { ...@@ -1439,6 +1439,60 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
ASSERT_GT(old_num_files, new_num_files); ASSERT_GT(old_num_files, new_num_files);
} }
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
// regression test for #2833: groups of files whose user-keys overlap at the
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
// reappear, either because a new version of the key was removed, or a range
// deletion was partially dropped. It could also cause non-overlapping
// invariant to be violated if the files dropped by DeleteFilesInRange were
// a subset of files that a range deletion spans.
const int kNumL0Files = 2;
const int kValSize = 8 << 10; // 8KB
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
options.target_file_size_base = 1 << 10; // 1KB
DestroyAndReopen(options);
// The snapshot prevents key 1 from having its old version dropped. The low
// `target_file_size_base` ensures two keys will be in each output file.
const Snapshot* snapshot = nullptr;
Random rnd(301);
// The value indicates which flush the key belonged to, which is enough
// for us to determine the keys' relative ages. After L0 flushes finish,
// files look like:
//
// File 0: 0 -> vals[0], 1 -> vals[0]
// File 1: 1 -> vals[1], 2 -> vals[1]
//
// Then L0->L1 compaction happens, which outputs keys as follows:
//
// File 0: 0 -> vals[0], 1 -> vals[1]
// File 1: 1 -> vals[0], 2 -> vals[1]
//
// DeleteFilesInRange shouldn't be allowed to drop just file 0, as that
// would cause `1 -> vals[0]` (an older key) to reappear.
std::string vals[kNumL0Files];
for (int i = 0; i < kNumL0Files; ++i) {
vals[i] = RandomString(&rnd, kValSize);
Put(Key(i), vals[i]);
Put(Key(i + 1), vals[i]);
Flush();
if (i == 0) {
snapshot = db_->GetSnapshot();
}
}
dbfull()->TEST_WaitForCompact();
// Verify `DeleteFilesInRange` can't drop only file 0 which would cause
// "1 -> vals[0]" to reappear.
Slice begin = Key(0);
Slice end = Key(1);
ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
ASSERT_EQ(vals[1], Get(Key(1)));
db_->ReleaseSnapshot(snapshot);
}
TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) { TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
int32_t trivial_move = 0; int32_t trivial_move = 0;
int32_t non_trivial_move = 0; int32_t non_trivial_move = 0;
......
...@@ -2062,25 +2062,19 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family, ...@@ -2062,25 +2062,19 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
end_key = &end_storage; end_key = &end_storage;
} }
vstorage->GetOverlappingInputs(i, begin_key, end_key, &level_files, -1, vstorage->GetCleanInputsWithinInterval(i, begin_key, end_key,
nullptr, false); &level_files, -1 /* hint_index */,
nullptr /* file_index */);
FileMetaData* level_file; FileMetaData* level_file;
for (uint32_t j = 0; j < level_files.size(); j++) { for (uint32_t j = 0; j < level_files.size(); j++) {
level_file = level_files[j]; level_file = level_files[j];
if (((begin == nullptr) || if (level_file->being_compacted) {
(cfd->internal_comparator().user_comparator()->Compare( continue;
level_file->smallest.user_key(), *begin) >= 0)) &&
((end == nullptr) ||
(cfd->internal_comparator().user_comparator()->Compare(
level_file->largest.user_key(), *end) <= 0))) {
if (level_file->being_compacted) {
continue;
}
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(i, level_file->fd.GetNumber());
deleted_files.push_back(level_file);
level_file->being_compacted = true;
} }
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(i, level_file->fd.GetNumber());
deleted_files.push_back(level_file);
level_file->being_compacted = true;
} }
} }
if (edit.GetDeletedFiles().empty()) { if (edit.GetDeletedFiles().empty()) {
......
...@@ -1777,27 +1777,33 @@ void VersionStorageInfo::GetOverlappingInputs( ...@@ -1777,27 +1777,33 @@ void VersionStorageInfo::GetOverlappingInputs(
void VersionStorageInfo::GetCleanInputsWithinInterval( void VersionStorageInfo::GetCleanInputsWithinInterval(
int level, const InternalKey* begin, const InternalKey* end, int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const { std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const {
if (level >= num_non_empty_levels_) { inputs->clear();
if (file_index) {
*file_index = -1;
}
if (level >= num_non_empty_levels_ || level == 0 ||
level_files_brief_[level].num_files == 0) {
// this level is empty, no inputs within range // this level is empty, no inputs within range
// also don't support clean input interval within L0
return; return;
} }
inputs->clear();
Slice user_begin, user_end; Slice user_begin, user_end;
if (begin != nullptr) { const auto& level_files = level_files_brief_[level];
if (begin == nullptr) {
user_begin = ExtractUserKey(level_files.files[0].smallest_key);
} else {
user_begin = begin->user_key(); user_begin = begin->user_key();
} }
if (end != nullptr) { if (end == nullptr) {
user_end = ExtractUserKey(
level_files.files[level_files.num_files - 1].largest_key);
} else {
user_end = end->user_key(); user_end = end->user_key();
} }
if (file_index) { GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
*file_index = -1; hint_index, file_index,
} true /* within_interval */);
if (begin != nullptr && end != nullptr && level > 0) {
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
hint_index, file_index,
true /* within_interval */);
}
} }
// Store in "*inputs" all files in "level" that overlap [begin,end] // Store in "*inputs" all files in "level" that overlap [begin,end]
...@@ -1860,8 +1866,8 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( ...@@ -1860,8 +1866,8 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
} else { } else {
ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid, ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid,
&start_index, &end_index); &start_index, &end_index);
assert(end_index >= start_index);
} }
assert(end_index >= start_index);
// insert overlapping files into vector // insert overlapping files into vector
for (int i = start_index; i <= end_index; i++) { for (int i = start_index; i <= end_index; i++) {
inputs->push_back(files_[level][i]); inputs->push_back(files_[level][i]);
......
...@@ -325,7 +325,8 @@ void CancelAllBackgroundWork(DB* db, bool wait = false); ...@@ -325,7 +325,8 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
// Delete files which are entirely in the given range // Delete files which are entirely in the given range
// Could leave some keys in the range which are in files which are not // Could leave some keys in the range which are in files which are not
// entirely in the range. // entirely in the range. Also leaves L0 files regardless of whether they're
// in the range.
// Snapshots before the delete might not see the data in the given range. // Snapshots before the delete might not see the data in the given range.
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family, Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end); const Slice* begin, const Slice* end);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment