|
@@ -94,6 +94,7 @@ async function flushClickHouseBuffer(buffer: IEventLog[], tableName: string, pro
|
|
|
// --- Historical Data Ingestion Logic ---
|
|
// --- Historical Data Ingestion Logic ---
|
|
|
// Now accepts only the log files directory as argument
|
|
// Now accepts only the log files directory as argument
|
|
|
async function ingestHistoricalData(logFilesDir: string) {
|
|
async function ingestHistoricalData(logFilesDir: string) {
|
|
|
|
|
+ const overallStartTime = dayjs();
|
|
|
console.log(`Starting historical ingestion from local files in: ${logFilesDir}`);
|
|
console.log(`Starting historical ingestion from local files in: ${logFilesDir}`);
|
|
|
|
|
|
|
|
let totalProcessedEvents = 0;
|
|
let totalProcessedEvents = 0;
|
|
@@ -116,10 +117,20 @@ async function ingestHistoricalData(logFilesDir: string) {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- for (const expectedFilename of allFiles) {
|
|
|
|
|
|
|
+ const totalFiles = allFiles.length;
|
|
|
|
|
+ if (totalFiles === 0) {
|
|
|
|
|
+ console.log("No .log.gz files found in the specified directory. Exiting.");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ console.log(`Found ${totalFiles} .log.gz files to process.`);
|
|
|
|
|
+
|
|
|
|
|
+ for (let i = 0; i < totalFiles; i++) {
|
|
|
|
|
+ const expectedFilename = allFiles[i];
|
|
|
const filePath = path.join(logFilesDir, expectedFilename);
|
|
const filePath = path.join(logFilesDir, expectedFilename);
|
|
|
|
|
+ const fileStartTime = dayjs();
|
|
|
|
|
+
|
|
|
|
|
+ console.log(`\n--- Processing file ${i + 1}/${totalFiles}: ${expectedFilename} ---`);
|
|
|
|
|
|
|
|
- console.log(`\n--- Processing file: ${expectedFilename} ---`);
|
|
|
|
|
let processedEventsCount = 0;
|
|
let processedEventsCount = 0;
|
|
|
let upsertedUsersCount = 0;
|
|
let upsertedUsersCount = 0;
|
|
|
|
|
|
|
@@ -161,6 +172,12 @@ async function ingestHistoricalData(logFilesDir: string) {
|
|
|
continue; // Skip if not an allowed event type
|
|
continue; // Skip if not an allowed event type
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // Skip events with invalid duration
|
|
|
|
|
+ if (eventLog.duration > 10000 || eventLog.duration < 0) {
|
|
|
|
|
+ console.warn(`Skipping event with invalid duration: ${eventLog.duration}. Event: ${JSON.stringify(eventLog)}`);
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
const uid = projectId === 1 ? eventLog.uid : eventLog.user_id;
|
|
const uid = projectId === 1 ? eventLog.uid : eventLog.user_id;
|
|
|
if (!uid) {
|
|
if (!uid) {
|
|
|
console.warn(`Skipping event with missing UID in '${expectedFilename}': ${JSON.stringify(eventLog)}`);
|
|
console.warn(`Skipping event with missing UID in '${expectedFilename}': ${JSON.stringify(eventLog)}`);
|
|
@@ -395,15 +412,18 @@ async function ingestHistoricalData(logFilesDir: string) {
|
|
|
|
|
|
|
|
totalProcessedEvents += processedEventsCount;
|
|
totalProcessedEvents += processedEventsCount;
|
|
|
totalUpsertedUsers += upsertedUsersCount;
|
|
totalUpsertedUsers += upsertedUsersCount;
|
|
|
- console.log(`File '${expectedFilename}' processed: ${processedEventsCount} events, ${upsertedUsersCount} users upserted.`);
|
|
|
|
|
|
|
+ const fileDuration = dayjs.duration(dayjs().diff(fileStartTime)).asSeconds();
|
|
|
|
|
+ console.log(`File '${expectedFilename}' processed: ${processedEventsCount} events, ${upsertedUsersCount} users upserted. Time taken: ${fileDuration.toFixed(2)} seconds.`);
|
|
|
} catch (error) {
|
|
} catch (error) {
|
|
|
console.error(`Error processing file '${expectedFilename}':`, error);
|
|
console.error(`Error processing file '${expectedFilename}':`, error);
|
|
|
// If an error occurs during file processing, we still want to move to the next file
|
|
// If an error occurs during file processing, we still want to move to the next file
|
|
|
}
|
|
}
|
|
|
} // End of for...of allFiles loop
|
|
} // End of for...of allFiles loop
|
|
|
|
|
+ const overallDuration = dayjs.duration(dayjs().diff(overallStartTime)).asSeconds();
|
|
|
console.log(`\n--- Historical data ingestion complete ---`);
|
|
console.log(`\n--- Historical data ingestion complete ---`);
|
|
|
console.log(`Total processed events: ${totalProcessedEvents}`);
|
|
console.log(`Total processed events: ${totalProcessedEvents}`);
|
|
|
console.log(`Total upserted users: ${totalUpsertedUsers}`);
|
|
console.log(`Total upserted users: ${totalUpsertedUsers}`);
|
|
|
|
|
+ console.log(`Total time taken: ${overallDuration.toFixed(2)} seconds.`);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// --- Main execution ---
|
|
// --- Main execution ---
|