cove-api / src /agents /data_agent.py
mickey1976's picture
Deploy: Minimal FastAPI backend for CoVE Space
549c270
# src/agents/data_agent.py
import subprocess
import sys
import os
from pathlib import Path
from typing import Literal
import urllib.request
class DataAgent:
"""
Runs data prep scripts for a dataset:
- Downloads raw files if not present
- join_meta.py
- build_text_emb.py
- build_image_emb.py
- build_meta_emb.py
"""
def _run(self, argv):
print("β†’", " ".join(argv))
subprocess.check_call(argv)
def _download_raw_data(self, dataset: str):
if dataset != "beauty":
raise ValueError(f"Auto-download is only supported for 'beauty' dataset")
base_dir = Path("data/raw/beauty")
base_dir.mkdir(parents=True, exist_ok=True)
files = {
"reviews.json": "https://huggingface.co/datasets/mickey1976/mayankc-amazon_beauty_subset/resolve/main/reviews.json",
"meta.json": "https://huggingface.co/datasets/mickey1976/mayankc-amazon_beauty_subset/resolve/main/meta.json",
}
for fname, url in files.items():
out_path = base_dir / fname
if not out_path.exists():
print(f"⬇️ Downloading {fname}...")
urllib.request.urlretrieve(url, out_path)
print(f"βœ… Saved to {out_path}")
else:
print(f"βœ”οΈ Already exists: {out_path}")
def prepare(self, dataset: Literal["beauty"] = "beauty"):
print(f"