|
@@ -11,6 +11,7 @@ CURRENT_MONTH="${CURRENT_MONTH:-$(date +%Y%m)}"
|
|
|
DRY_RUN=1
|
|
DRY_RUN=1
|
|
|
SKIP_PM2=0
|
|
SKIP_PM2=0
|
|
|
FORCE=0
|
|
FORCE=0
|
|
|
|
|
+RECONCILE_ONLY=0
|
|
|
|
|
|
|
|
usage() {
|
|
usage() {
|
|
|
cat <<'EOF'
|
|
cat <<'EOF'
|
|
@@ -21,6 +22,7 @@ Options:
|
|
|
--execute Actually run the migration. Without this flag, the script is a dry-run.
|
|
--execute Actually run the migration. Without this flag, the script is a dry-run.
|
|
|
--skip-pm2 Do not stop/start PM2 ingestor-service during cutover.
|
|
--skip-pm2 Do not stop/start PM2 ingestor-service during cutover.
|
|
|
--force Allow destination/backup tables to already exist.
|
|
--force Allow destination/backup tables to already exist.
|
|
|
|
|
+ --reconcile-only Only reconcile missing rows from source into destination, without create/rename.
|
|
|
--current-month Override the current month used for cutover backfill, format YYYYMM.
|
|
--current-month Override the current month used for cutover backfill, format YYYYMM.
|
|
|
--help Show this help message.
|
|
--help Show this help message.
|
|
|
|
|
|
|
@@ -64,6 +66,10 @@ while [[ $# -gt 0 ]]; do
|
|
|
FORCE=1
|
|
FORCE=1
|
|
|
shift
|
|
shift
|
|
|
;;
|
|
;;
|
|
|
|
|
+ --reconcile-only)
|
|
|
|
|
+ RECONCILE_ONLY=1
|
|
|
|
|
+ shift
|
|
|
|
|
+ ;;
|
|
|
--current-month)
|
|
--current-month)
|
|
|
CURRENT_MONTH="$2"
|
|
CURRENT_MONTH="$2"
|
|
|
shift 2
|
|
shift 2
|
|
@@ -212,13 +218,17 @@ dst_exists="$(run_sql "EXISTS TABLE ${DB_NAME}.${DST_TABLE} FORMAT TabSeparatedR
|
|
|
backup_exists="$(run_sql "EXISTS TABLE ${DB_NAME}.${BACKUP_TABLE} FORMAT TabSeparatedRaw")"
|
|
backup_exists="$(run_sql "EXISTS TABLE ${DB_NAME}.${BACKUP_TABLE} FORMAT TabSeparatedRaw")"
|
|
|
|
|
|
|
|
if [[ $FORCE -eq 0 && "$dst_exists" == "1" ]]; then
|
|
if [[ $FORCE -eq 0 && "$dst_exists" == "1" ]]; then
|
|
|
- echo "Destination table ${DB_NAME}.${DST_TABLE} already exists. Use --force only if you know it is safe." >&2
|
|
|
|
|
- exit 1
|
|
|
|
|
|
|
+ if [[ $RECONCILE_ONLY -eq 0 ]]; then
|
|
|
|
|
+ echo "Destination table ${DB_NAME}.${DST_TABLE} already exists. Use --force only if you know it is safe." >&2
|
|
|
|
|
+ exit 1
|
|
|
|
|
+ fi
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
if [[ $FORCE -eq 0 && "$backup_exists" == "1" ]]; then
|
|
if [[ $FORCE -eq 0 && "$backup_exists" == "1" ]]; then
|
|
|
- echo "Backup table ${DB_NAME}.${BACKUP_TABLE} already exists. Use --force only if you know it is safe." >&2
|
|
|
|
|
- exit 1
|
|
|
|
|
|
|
+ if [[ $RECONCILE_ONLY -eq 0 ]]; then
|
|
|
|
|
+ echo "Backup table ${DB_NAME}.${BACKUP_TABLE} already exists. Use --force only if you know it is safe." >&2
|
|
|
|
|
+ exit 1
|
|
|
|
|
+ fi
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
month_list="$(run_sql "SELECT DISTINCT toYYYYMM(time) AS ym FROM ${DB_NAME}.${SRC_TABLE} ORDER BY ym FORMAT TSV")"
|
|
month_list="$(run_sql "SELECT DISTINCT toYYYYMM(time) AS ym FROM ${DB_NAME}.${SRC_TABLE} ORDER BY ym FORMAT TSV")"
|
|
@@ -277,6 +287,43 @@ if [[ "$CLICKHOUSE_EXEC_MODE" == "docker" ]]; then
|
|
|
printf 'ClickHouse container: %s\n' "$CLICKHOUSE_CONTAINER"
|
|
printf 'ClickHouse container: %s\n' "$CLICKHOUSE_CONTAINER"
|
|
|
fi
|
|
fi
|
|
|
printf 'Mode: %s\n' "$([[ $DRY_RUN -eq 0 ]] && echo execute || echo dry-run)"
|
|
printf 'Mode: %s\n' "$([[ $DRY_RUN -eq 0 ]] && echo execute || echo dry-run)"
|
|
|
|
|
+printf 'Reconcile only: %s\n' "$([[ $RECONCILE_ONLY -eq 1 ]] && echo yes || echo no)"
|
|
|
|
|
+
|
|
|
|
|
+diff_sql="SELECT src.ym, src.rows AS source_rows, ifNull(dst.rows, 0) AS destination_rows, src.rows - ifNull(dst.rows, 0) AS missing_rows FROM (SELECT toYYYYMM(time) AS ym, count() AS rows FROM ${DB_NAME}.${SRC_TABLE} GROUP BY ym) AS src LEFT JOIN (SELECT toYYYYMM(time) AS ym, count() AS rows FROM ${DB_NAME}.${DST_TABLE} GROUP BY ym) AS dst USING (ym) ORDER BY ym FORMAT PrettyCompact"
|
|
|
|
|
+
|
|
|
|
|
+reconcile_current_month_sql="INSERT INTO ${DB_NAME}.${DST_TABLE} SELECT src.* FROM ${DB_NAME}.${SRC_TABLE} AS src LEFT JOIN ${DB_NAME}.${DST_TABLE} AS dst ON src.log_id = dst.log_id WHERE src.time >= toDateTime('${current_month_start}') AND dst.log_id IS NULL"
|
|
|
|
|
+
|
|
|
|
|
+print_sql_block "Per-month diff" "$diff_sql"
|
|
|
|
|
+
|
|
|
|
|
+if [[ $RECONCILE_ONLY -eq 1 ]]; then
|
|
|
|
|
+ print_sql_block "Reconcile current month missing rows" "$reconcile_current_month_sql"
|
|
|
|
|
+
|
|
|
|
|
+ if [[ $DRY_RUN -eq 1 ]]; then
|
|
|
|
|
+ printf '\nDry-run only. Re-run with --execute --reconcile-only to perform reconciliation.\n'
|
|
|
|
|
+ exit 0
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ if [[ $SKIP_PM2 -eq 0 ]]; then
|
|
|
|
|
+ run_cmd_step "Stop PM2 ingestor" pm2 stop "${INGESTOR_PM2_NAME}"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ printf '\nPer-month diff before reconcile:\n'
|
|
|
|
|
+ run_sql "$diff_sql"
|
|
|
|
|
+
|
|
|
|
|
+ run_sql_step "Reconcile current month missing rows" "$reconcile_current_month_sql"
|
|
|
|
|
+
|
|
|
|
|
+ old_rows="$(run_sql "SELECT count() FROM ${DB_NAME}.${SRC_TABLE} FORMAT TabSeparatedRaw")"
|
|
|
|
|
+ new_rows="$(run_sql "SELECT count() FROM ${DB_NAME}.${DST_TABLE} FORMAT TabSeparatedRaw")"
|
|
|
|
|
+ printf '\nRow count check after reconcile: source=%s destination=%s\n' "$old_rows" "$new_rows"
|
|
|
|
|
+ printf '\nPer-month diff after reconcile:\n'
|
|
|
|
|
+ run_sql "$diff_sql"
|
|
|
|
|
+
|
|
|
|
|
+ if [[ $SKIP_PM2 -eq 0 ]]; then
|
|
|
|
|
+ run_cmd_step "Start PM2 ingestor" pm2 start "${INGESTOR_PM2_NAME}"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ exit 0
|
|
|
|
|
+fi
|
|
|
|
|
|
|
|
print_sql_block "Create destination table" "$create_sql"
|
|
print_sql_block "Create destination table" "$create_sql"
|
|
|
|
|
|
|
@@ -287,7 +334,7 @@ for ym in "${historical_months[@]}"; do
|
|
|
done
|
|
done
|
|
|
|
|
|
|
|
current_month_start="${CURRENT_MONTH:0:4}-${CURRENT_MONTH:4:2}-01 00:00:00"
|
|
current_month_start="${CURRENT_MONTH:0:4}-${CURRENT_MONTH:4:2}-01 00:00:00"
|
|
|
-current_month_sql="INSERT INTO ${DB_NAME}.${DST_TABLE} SELECT * FROM ${DB_NAME}.${SRC_TABLE} WHERE time >= toDateTime('${current_month_start}') AND time < now()"
|
|
|
|
|
|
|
+current_month_sql="INSERT INTO ${DB_NAME}.${DST_TABLE} SELECT * FROM ${DB_NAME}.${SRC_TABLE} WHERE time >= toDateTime('${current_month_start}')"
|
|
|
print_sql_block "Cutover current month backfill" "$current_month_sql"
|
|
print_sql_block "Cutover current month backfill" "$current_month_sql"
|
|
|
|
|
|
|
|
rename_sql="RENAME TABLE ${DB_NAME}.${SRC_TABLE} TO ${DB_NAME}.${BACKUP_TABLE}, ${DB_NAME}.${DST_TABLE} TO ${DB_NAME}.${SRC_TABLE}"
|
|
rename_sql="RENAME TABLE ${DB_NAME}.${SRC_TABLE} TO ${DB_NAME}.${BACKUP_TABLE}, ${DB_NAME}.${DST_TABLE} TO ${DB_NAME}.${SRC_TABLE}"
|
|
@@ -321,6 +368,8 @@ printf '\nRow count check: source=%s destination=%s\n' "$old_rows" "$new_rows"
|
|
|
|
|
|
|
|
if [[ "$old_rows" != "$new_rows" ]]; then
|
|
if [[ "$old_rows" != "$new_rows" ]]; then
|
|
|
echo "Row count mismatch detected. Migration aborted before rename." >&2
|
|
echo "Row count mismatch detected. Migration aborted before rename." >&2
|
|
|
|
|
+ printf '\nPer-month diff:\n'
|
|
|
|
|
+ run_sql "$diff_sql"
|
|
|
if [[ $SKIP_PM2 -eq 0 ]]; then
|
|
if [[ $SKIP_PM2 -eq 0 ]]; then
|
|
|
pm2 start "$INGESTOR_PM2_NAME"
|
|
pm2 start "$INGESTOR_PM2_NAME"
|
|
|
fi
|
|
fi
|