From e92979a58cd2963dac6ee25e3af3c6aad6125035 Mon Sep 17 00:00:00 2001 From: xunyulin230420 <您的邮箱766024@qq.com> Date: Sun, 25 Jan 2026 01:06:16 +0800 Subject: [PATCH] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0=20L1A=20=E5=A2=9E?= =?UTF-8?q?=E9=87=8F=E5=8A=9F=E8=83=BD=E8=AF=B4=E6=98=8E=E5=8F=8A=20ETL=20?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ETL/L1A.py | 12 ++++++++++++ ETL/README.md | 20 ++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/ETL/L1A.py b/ETL/L1A.py index bcc6d6d..cd488bb 100644 --- a/ETL/L1A.py +++ b/ETL/L1A.py @@ -1,4 +1,16 @@ +""" +L1A Data Ingestion Script + +This script reads raw JSON files from the 'output_arena' directory and ingests them into the SQLite database. +It supports incremental updates by default, skipping files that have already been processed. + +Usage: + python ETL/L1A.py # Standard incremental run + python ETL/L1A.py --force # Force re-process all files (overwrite existing data) +""" + import os + import json import sqlite3 import glob diff --git a/ETL/README.md b/ETL/README.md index c6df99c..77d085f 100644 --- a/ETL/README.md +++ b/ETL/README.md @@ -1,7 +1,23 @@ -L1A output_arena/iframe_network.json -> L1A.sqlite(Primary Key: match_id) +# ETL Pipeline Documentation + +## 1. L1A (Raw Data Ingestion) +**Status**: ✅ Supports Incremental Update + +This script ingests raw JSON files from `output_arena/` into `database/L1A/L1A.sqlite`. + +### Usage +```bash +# Standard Run (Incremental) +# Only processes new files that are not yet in the database. +python ETL/L1A.py + +# Force Refresh +# Reprocesses ALL files, overwriting existing records. +python ETL/L1A.py --force +``` L1B demoparser2 -> L1B.sqlite L2 L1A.sqlite (+L1b.sqlite) -> L2.sqlite -L3 Deep Dive. \ No newline at end of file +L3 Deep Dive \ No newline at end of file