diff --git a/ETL/L1A.py b/ETL/L1A.py index bcc6d6d..cd488bb 100644 --- a/ETL/L1A.py +++ b/ETL/L1A.py @@ -1,4 +1,16 @@ +""" +L1A Data Ingestion Script + +This script reads raw JSON files from the 'output_arena' directory and ingests them into the SQLite database. +It supports incremental updates by default, skipping files that have already been processed. + +Usage: + python ETL/L1A.py # Standard incremental run + python ETL/L1A.py --force # Force re-process all files (overwrite existing data) +""" + import os + import json import sqlite3 import glob diff --git a/ETL/README.md b/ETL/README.md index c6df99c..77d085f 100644 --- a/ETL/README.md +++ b/ETL/README.md @@ -1,7 +1,23 @@ -L1A output_arena/iframe_network.json -> L1A.sqlite(Primary Key: match_id) +# ETL Pipeline Documentation + +## 1. L1A (Raw Data Ingestion) +**Status**: ✅ Supports Incremental Update + +This script ingests raw JSON files from `output_arena/` into `database/L1A/L1A.sqlite`. + +### Usage +```bash +# Standard Run (Incremental) +# Only processes new files that are not yet in the database. +python ETL/L1A.py + +# Force Refresh +# Reprocesses ALL files, overwriting existing records. +python ETL/L1A.py --force +``` L1B demoparser2 -> L1B.sqlite L2 L1A.sqlite (+L1b.sqlite) -> L2.sqlite -L3 Deep Dive. \ No newline at end of file +L3 Deep Dive \ No newline at end of file