Adjust daily benchmark script (#84)

Summary: Adjusting daily benchmark script: * Default params are more close to what we use for TiKV * change bulkload to receive `NUM_THREADS` param, also bulkload disable titan GC. Test Plan: local test. Signed-off-by: Yi Wu <yiwu@pingcap.com>

Adjust daily benchmark script (#84)
Summary: Adjusting daily benchmark script: * Default params are more close to what we use for TiKV * change bulkload to receive `NUM_THREADS` param, also bulkload disable titan GC. Test Plan: local test. Signed-off-by: Yi Wu <yiwu@pingcap.com>
280b59d6 · yiwu-arbug · GitHub · 974e5c6a · 280b59d6 · 280b59d6
Unverified Commit 280b59d6 authored Sep 27, 2019 by yiwu-arbug Committed by GitHub Sep 27, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 56 additions and 85 deletions

benchmark.sh tools/benchmark.sh +38 -83

db_bench_tool.cc tools/db_bench_tool.cc +18 -2

No files found.
--- a/tools/benchmark.sh
+++ b/tools/benchmark.sh
@@ -40,18 +40,10 @@ if [ ! -d $output_dir ]; then
  mkdir -p $output_dir
 fi
-# all multithreaded tests run with sync=1 unless
+num_threads=${NUM_THREADS:-1}
-# $DB_BENCH_NO_SYNC is defined
-syncval="1"
-if [ ! -z $DB_BENCH_NO_SYNC ]; then
-  echo "Turning sync off for all multithreaded tests"
-  syncval="0";
-fi
-num_threads=${NUM_THREADS:-16}
 mb_written_per_sec=${MB_WRITE_PER_SEC:-0}
 # Only for tests that do range scans
-num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
+num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-100}
 cache_size=${CACHE_SIZE:-$((1 * G))}
 compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0}
 compression_type=${COMPRESSION_TYPE:-snappy}
@@ -59,70 +51,48 @@ duration=${DURATION:-0}
 num_keys=${NUM_KEYS:-$((1 * G))}
 key_size=${KEY_SIZE:-20}
-value_size=${VALUE_SIZE:-400}
+value_size=${VALUE_SIZE:-100}
-block_size=${BLOCK_SIZE:-8192}
 const_params="
  --db=$DB_DIR \
  --wal_dir=$WAL_DIR \
  \
  --num=$num_keys \
-  --num_levels=6 \
  --key_size=$key_size \
  --value_size=$value_size \
-  --block_size=$block_size \
-  --cache_size=$cache_size \
-  --cache_numshardbits=6 \
-  --compression_max_dict_bytes=$compression_max_dict_bytes \
  --compression_ratio=0.5 \
  --compression_type=$compression_type \
-  --level_compaction_dynamic_level_bytes=true \
+  \
-  --bytes_per_sync=$((8 * M)) \
+  --block_size=$((64 * K)) \
-  --cache_index_and_filter_blocks=0 \
+  --cache_size=$cache_size \
+  --cache_numshardbits=6 \
+  --cache_index_and_filter_blocks=1 \
  --pin_l0_filter_and_index_blocks_in_cache=1 \
-  --benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \
+  --bloom_bits=10 \
  \
-  --hard_rate_limit=3 \
+  --num_levels=7 \
-  --rate_limit_delay_max_milliseconds=1000000 \
  --write_buffer_size=$((128 * M)) \
-  --target_file_size_base=$((128 * M)) \
+  --level_compaction_dynamic_level_bytes=true \
-  --max_bytes_for_level_base=$((1 * G)) \
+  --max_write_buffer_number=5 \
+  --target_file_size_base=$((8 * M)) \
+  --max_bytes_for_level_base=$((512 * M)) \
+  --max_bytes_for_level_multiplier=10 \
  \
+  --max_background_jobs=6 \
+  --titan_max_background_gc=6
+  --open_files=40960 \
+  --statistics=1 \
  --verify_checksum=1 \
-  --delete_obsolete_files_period_micros=$((60 * M)) \
-  --max_bytes_for_level_multiplier=8 \
  \
-  --statistics=0 \
+  --bytes_per_sync=$((1 * M)) \
-  --stats_per_interval=1 \
+  --wal_bytes_per_sync=$((512 * K)) \
-  --stats_interval_seconds=60 \
-  --histogram=1 \
  \
-  --memtablerep=skip_list \
-  --bloom_bits=10 \
-  --open_files=-1 \
  --use_titan=$TITAN"
-l0_config="
-  --level0_file_num_compaction_trigger=4 \
-  --level0_slowdown_writes_trigger=12 \
-  --level0_stop_writes_trigger=20"
 if [ $duration -gt 0 ]; then
  const_params="$const_params --duration=$duration"
 fi
-params_w="$const_params \
-          $l0_config \
-          --max_background_jobs=20 \
-          --max_write_buffer_number=8"
-params_bulkload="$const_params \
-                 --max_background_jobs=20 \
-                 --max_write_buffer_number=8 \
-                 --level0_file_num_compaction_trigger=$((10 * M)) \
-                 --level0_slowdown_writes_trigger=$((10 * M)) \
-                 --level0_stop_writes_trigger=$((10 * M))"
 #
 # Tune values for level and universal compaction.
 # For universal compaction, these level0_* options mean total sorted of runs in
@@ -178,13 +148,13 @@ function run_bulkload {
  # TITAN: The implementation of memtable is changed from vector to default skiplist. Because GC in titan need to get in memtable. Vector will cause poor performance.
  echo "Bulk loading $num_keys random keys"
  cmd="./titandb_bench --benchmarks=fillrandom \
+       $const_params \
       --use_existing_db=0 \
       --disable_auto_compactions=1 \
-       --sync=0 \
+       --disable_wal=true
-       $params_bulkload \
+       --sync=false \
-       --threads=1 \
+       --titan_disable_background_gc=true \
-       --allow_concurrent_memtable_write=false \
+       --threads=${num_threads} \
-       --disable_wal=1 \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
  echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log
@@ -195,7 +165,7 @@ function run_bulkload {
       --use_existing_db=1 \
       --disable_auto_compactions=1 \
       --sync=0 \
-       $params_w \
+       $const_params \
       --threads=1 \
       2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
  echo $cmd | tee $output_dir/benchmark_bulkload_compact.log
@@ -298,14 +268,6 @@ function run_univ_compaction {
 }
 function run_fillseq {
-  # This runs with a vector memtable. WAL can be either disabled or enabled
-  # depending on the input parameter (1 for disabled, 0 for enabled). The main
-  # benefit behind disabling WAL is to make loading faster. It is still crash
-  # safe and the client can discover where to restart a load after a crash. I
-  # think this is a good way to load.
-  # TITAN: The implementation of memtable is changed from vector to default skiplist. Because GC in titan need to get in memtable. Vector will cause poor performance.
  # Make sure that we'll have unique names for all the files so that data won't
  # be overwritten.
  if [ $1 == 1 ]; then
@@ -319,12 +281,9 @@ function run_fillseq {
  echo "Loading $num_keys keys sequentially"
  cmd="./titandb_bench --benchmarks=fillseq \
       --use_existing_db=0 \
-       --sync=0 \
+       $const_params \
-       $params_w \
-       --min_level_to_compress=0 \
-       --threads=1 \
-       --allow_concurrent_memtable_write=false \
       --disable_wal=$1 \
+       --threads=${num_threads} \
       --seed=$( date +%s ) \
       2>&1 | tee -a $log_file_name"
  echo $cmd | tee $log_file_name
@@ -337,18 +296,17 @@ function run_fillseq {
 function run_change {
  operation=$1
  echo "Do $num_keys random $operation"
-  out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
+  out_name="benchmark_${operation}.t${num_threads}.log"
  cmd="./titandb_bench --benchmarks=$operation \
       --use_existing_db=1 \
-       --sync=$syncval \
+       $const_params \
-       $params_w \
       --threads=$num_threads \
       --merge_operator=\"put\" \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
-  summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation
+  summarize_result $output_dir/${out_name} ${operation}.t${num_threads} $operation
 }
 function run_filluniquerandom {
@@ -356,7 +314,7 @@ function run_filluniquerandom {
  cmd="./titandb_bench --benchmarks=filluniquerandom \
       --use_existing_db=0 \
       --sync=0 \
-       $params_w \
+       $const_params \
       --threads=1 \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
@@ -370,7 +328,7 @@ function run_readrandom {
  out_name="benchmark_readrandom.t${num_threads}.log"
  cmd="./titandb_bench --benchmarks=readrandom \
       --use_existing_db=1 \
-       $params_w \
+       $const_params \
       --threads=$num_threads \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
@@ -385,8 +343,7 @@ function run_readwhile {
  out_name="benchmark_readwhile${operation}.t${num_threads}.log"
  cmd="./titandb_bench --benchmarks=readwhile${operation} \
       --use_existing_db=1 \
-       --sync=$syncval \
+       $const_params \
-       $params_w \
       --threads=$num_threads \
       --merge_operator=\"put\" \
       --seed=$( date +%s ) \
@@ -404,8 +361,7 @@ function run_rangewhile {
  echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}"
  cmd="./titandb_bench --benchmarks=seekrandomwhile${operation} \
       --use_existing_db=1 \
-       --sync=$syncval \
+       $const_params \
-       $params_w \
       --threads=$num_threads \
       --merge_operator=\"put\" \
       --seek_nexts=$num_nexts_per_seek \
@@ -424,7 +380,7 @@ function run_range {
  echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}"
  cmd="./titandb_bench --benchmarks=seekrandom \
       --use_existing_db=1 \
-       $params_w \
+       $const_params \
       --threads=$num_threads \
       --seek_nexts=$num_nexts_per_seek \
       --reverse_iterator=$reverse_arg \
@@ -518,4 +474,4 @@ for job in ${jobs[@]}; do
  echo -e "ops/sec\tmb/sec\tSize-GB\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tp99.9\tp99.99\tUptime\tStall-time\tStall%\tTest"
  tail -1 $output_dir/report.txt
 done
\ No newline at end of file
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@@ -760,10 +760,21 @@ DEFINE_uint64(blob_db_min_blob_size, 0,
 // Titan Options
 DEFINE_bool(use_titan, true, "Open a Titan instance.");
-DEFINE_uint64(titan_db_min_blob_size, 0,
+DEFINE_uint64(titan_min_blob_size, 0,
              "Smallest blob to store in a file. Blobs smaller than this "
              "will be inlined with the key in the LSM tree.");
+DEFINE_bool(titan_disable_background_gc,
+            rocksdb::titandb::TitanOptions().disable_background_gc,
+            "Disable Titan background GC");
+DEFINE_int32(titan_max_background_gc,
+             rocksdb::titandb::TitanOptions().max_background_gc,
+             "Titan max background GC threads.");
+DEFINE_int64(titan_blob_cache_size, 0,
+             "Size of Titan blob cache. Disabled by default.");
 DEFINE_uint64(blob_db_bytes_per_sync, 0, "Bytes to sync blob file at.");
 DEFINE_uint64(blob_db_file_size, 256 * 1024 * 1024,
@@ -3813,9 +3824,14 @@ class Benchmark {
    }
    options.listeners.emplace_back(listener_);
-    opts->min_blob_size = FLAGS_titan_db_min_blob_size;
+    opts->min_blob_size = FLAGS_titan_min_blob_size;
+    opts->disable_background_gc = FLAGS_titan_disable_background_gc;
+    opts->max_background_gc = FLAGS_titan_max_background_gc;
    opts->min_gc_batch_size = 128 << 20;
    opts->blob_file_compression = FLAGS_compression_type_e;
+    if (FLAGS_titan_blob_cache_size > 0) {
+      opts->blob_cache = NewLRUCache(FLAGS_titan_blob_cache_size);
+    }
    if (FLAGS_num_multi_db <= 1) {
      OpenDb(options, FLAGS_db, &db_);
    } else {