WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit 522fbfc

Browse files
authored
Merge pull request #135 from guardian/migration-script
Migration script
2 parents 1331401 + 5bb25be commit 522fbfc

File tree

14 files changed

+1331
-0
lines changed

14 files changed

+1331
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

scripts/2025-11-18-migration-to-recipe-v3/README.md

Lines changed: 167 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from dataclasses import dataclass
2+
from pathlib import Path
3+
4+
5+
@dataclass(frozen=True)
6+
class Config:
7+
capi_key: str
8+
capi_url: str
9+
index_url: str
10+
templatiser_url: str
11+
templatiser_token: str
12+
integration_read_url: str
13+
integration_write_url: str
14+
ca_bundle_path: str | None
15+
16+
17+
def load_config(environment: str) -> Config:
18+
filepath = Path.home() / '.gu' / f"feast-migration-v3-config.{environment}.json"
19+
with open(filepath, 'r') as f:
20+
import json
21+
data = json.load(f)
22+
return Config(
23+
capi_key=data['capi_key'],
24+
capi_url=data['capi_url'],
25+
index_url=data['index_url'],
26+
templatiser_url=data['templatiser_url'],
27+
templatiser_token=data['templatiser_token'],
28+
integration_read_url=data['integration_read_url'],
29+
integration_write_url=data['integration_write_url'],
30+
ca_bundle_path=data.get('ca_bundle_path'),
31+
)
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import dataclasses
2+
import os
3+
from csv import DictReader, DictWriter
4+
from dataclasses import dataclass
5+
from enum import Enum
6+
7+
class Stage1ReportStatus(Enum):
8+
SUCCESS = "SUCCESS"
9+
ACCEPTED_BY_LLM = "ACCEPTED_BY_LLM"
10+
REVIEW_NEEDED = "REVIEW_NEEDED"
11+
ERROR = "ERROR"
12+
INCOMPLETE = "INCOMPLETE" # the LLM could not get it to match after 5 attempts
13+
14+
@dataclass(frozen=True)
15+
class Stage1Report:
16+
recipe_id: str
17+
capi_id: str
18+
composer_id: str | None
19+
filename: str
20+
status: Stage1ReportStatus
21+
reason: str | None
22+
diff: str | None
23+
expected: str | None
24+
received: str | None
25+
cost: str
26+
revision: int
27+
28+
@staticmethod
29+
def error(recipe_id: str, capi_id: str, reason: str) -> 'Stage1Report':
30+
return Stage1Report(
31+
recipe_id=recipe_id,
32+
capi_id=capi_id,
33+
composer_id=None,
34+
filename="",
35+
status=Stage1ReportStatus.ERROR,
36+
reason=reason,
37+
diff=None,
38+
expected=None,
39+
received=None,
40+
cost="0",
41+
revision=0,
42+
)
43+
44+
45+
def stage_1_csv_filename(state_folder: str) -> str:
46+
return f"{state_folder}/stage-1-results.csv"
47+
48+
def load_stage1_csv_state(state_folder: str) -> list[Stage1Report]:
49+
reports = []
50+
with open(stage_1_csv_filename(state_folder), newline='') as csvfile:
51+
reader = DictReader(csvfile)
52+
for row in reader:
53+
stage1_report = Stage1Report(
54+
recipe_id=row['recipe_id'],
55+
capi_id=row['capi_id'],
56+
composer_id=row['composer_id'] if row['composer_id'] else None,
57+
filename=row['filename'],
58+
status=Stage1ReportStatus(row['status'].removeprefix("Stage1ReportStatus.")),
59+
reason=row['reason'] if row['reason'] else None,
60+
diff=row['diff'] if row['diff'] else None,
61+
expected=row['expected'] if row['expected'] else None,
62+
received=row['received'] if row['received'] else None,
63+
cost=row['cost'],
64+
revision=int(row['revision']),
65+
)
66+
reports.append(stage1_report)
67+
return reports
68+
69+
class Stage2ReportStatus(Enum):
70+
SUCCESS = "success"
71+
ERROR = "error"
72+
CAPI_UPDATED = "capi_updated"
73+
74+
@dataclass(frozen=True)
75+
class Stage2Report(Stage1Report):
76+
stage2_status: Stage2ReportStatus
77+
failure_reason: str | None
78+
79+
@staticmethod
80+
def from_stage1_report(report: Stage1Report, status: Stage2ReportStatus,
81+
failure_reason: str | None) -> 'Stage2Report':
82+
return Stage2Report(
83+
**dataclasses.asdict(report),
84+
stage2_status=status,
85+
failure_reason=failure_reason,
86+
)
87+
88+
def stage_2_csv_filename(state_folder: str) -> str:
89+
return f"{state_folder}/stage-2-results.csv"
90+
91+
def append_stage2_report(state_folder: str, report: Stage2Report):
92+
filename = stage_2_csv_filename(state_folder)
93+
file_exists = os.path.exists(filename) and os.path.getsize(filename) > 0
94+
with open(filename, 'a', newline='') as f:
95+
fieldnames = [field.name for field in dataclasses.fields(Stage2Report)]
96+
writer = DictWriter(f, fieldnames=fieldnames)
97+
if not file_exists:
98+
writer.writeheader()
99+
f.flush()
100+
101+
writer.writerow(dataclasses.asdict(report))
102+
f.flush()
103+
104+
def load_stage2_csv_state(state_folder: str) -> list[Stage2Report]:
105+
reports = []
106+
filename = stage_2_csv_filename(state_folder)
107+
if not os.path.exists(filename):
108+
return reports
109+
with open(filename, newline='') as csvfile:
110+
reader = DictReader(csvfile)
111+
for row in reader:
112+
reports.append(Stage2Report(**row))
113+
return reports
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import json
2+
import logging
3+
import os
4+
from argparse import ArgumentParser
5+
6+
from config import load_config
7+
from fancy_logging import init_logger
8+
from services import fetch_index, fetch_flexible_article, fetch_CAPI_article
9+
10+
logger = logging.getLogger(__name__)
11+
12+
def main(state_folder: str, environment: str):
13+
init_logger()
14+
config = load_config(environment=environment)
15+
16+
os.makedirs(state_folder, exist_ok=True)
17+
18+
recipes = fetch_index(config)
19+
20+
capi_ids = set()
21+
for recipe in recipes:
22+
capi_ids.add(recipe.capi_id)
23+
24+
for capi_id in capi_ids:
25+
try:
26+
capi_fetch_response = fetch_CAPI_article(capi_id, config)
27+
if capi_fetch_response is None:
28+
logger.warning(f"Article {capi_id} not found in CAPI")
29+
continue
30+
31+
# fetch the recipes from composer (flexible)
32+
composer_id = capi_fetch_response["response"]["content"]["fields"].get("internalComposerCode") if capi_fetch_response is not None else None
33+
flexible_article = fetch_flexible_article(composer_id, config)
34+
35+
for recipe in flexible_article.recipes:
36+
with open(os.path.join(state_folder, f"{recipe["id"]}.json"), "w") as f:
37+
f.write(json.dumps(recipe, indent=2))
38+
except Exception as e:
39+
logger.error(f"Error processing CAPI article {capi_id}: {e}")
40+
logger.info("ALl done!")
41+
42+
if __name__ == "__main__":
43+
arg_parser = ArgumentParser(description='Stage 2 of the migration to recipe v3')
44+
arg_parser.add_argument('-s', '--state-folder', type=str, required=True, help='Path to the state folder')
45+
arg_parser.add_argument('-e', '--environment', type=str, default='CODE', choices=['LOCAL', 'CODE', 'PROD'], help='The environment to use (LOCAL, CODE, PROD)')
46+
47+
args = arg_parser.parse_args()
48+
main(state_folder=args.state_folder, environment=args.environment)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import logging
2+
from rich.logging import RichHandler
3+
from rich.console import Console
4+
5+
# Shared console instance for both logging and progress
6+
_console = None
7+
8+
def get_console():
9+
"""Get or create the shared console instance"""
10+
global _console
11+
if _console is None:
12+
_console = Console(stderr=True)
13+
return _console
14+
15+
class ColoredFormatter(logging.Formatter):
16+
# ANSI color codes
17+
COLORS = {
18+
'DEBUG': '\033[90m', # Grey
19+
'INFO': '\033[97m', # White
20+
'WARNING': '\033[93m', # Yellow
21+
'ERROR': '\033[91m', # Red
22+
'CRITICAL': '\033[91m', # Red
23+
}
24+
RESET = '\033[0m'
25+
26+
def format(self, record):
27+
log_color = self.COLORS.get(record.levelname, self.RESET)
28+
record.levelname = f"{log_color}{record.levelname}{self.RESET}"
29+
record.msg = f"{log_color}{record.msg}{self.RESET}"
30+
return super().format(record)
31+
32+
def init_logger(level=logging.INFO):
33+
handler = RichHandler(
34+
console=get_console(),
35+
show_time=True,
36+
show_path=False,
37+
rich_tracebacks=True,
38+
tracebacks_show_locals=True
39+
)
40+
logging.basicConfig(
41+
level=level,
42+
handlers=[handler],
43+
format="%(message)s",
44+
datefmt="[%Y-%m-%d %H:%M:%S]"
45+
)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[project]
2+
name = "2025-11-18-migration-to-recipe-v3"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.12"
7+
dependencies = [
8+
"boto3>=1.41.3",
9+
"requests>=2.32.5",
10+
"rich>=13.9.4",
11+
]

0 commit comments

Comments
 (0)