investormlops-api / src /ingest_data.py
Mayur-cinderace
Move Docker app to repo root for Hugging Face Spaces
0a6956c
import os
import shutil
import pandas as pd
from pathlib import Path
RAW_DIR = Path("data/raw")
RAW_DIR.mkdir(exist_ok=True)
STOCK_FILE = RAW_DIR / "stock_prices.csv"
def fetch_stock_data():
"""
In cloud environments (Codespaces), Yahoo Finance is blocked.
If stock_prices.csv already exists, reuse it safely.
"""
if STOCK_FILE.exists():
print("Using existing stock_prices.csv (no external fetch)")
return
raise RuntimeError(
"stock_prices.csv not found. "
"Place it manually in data/raw when running in Codespaces."
)
def copy_news_files():
source_dir = Path("data")
target_dir = RAW_DIR
files = ["news_articles.csv", "gnews_data.csv", "reddit_data.csv"]
for f in files:
src = source_dir / f
dst = target_dir / f
if not src.exists():
print(f"[WARN] {src} not found")
continue
if src.resolve() == dst.resolve():
continue
shutil.copy(src, dst)
print(f"Copied {src}{dst}")
if __name__ == "__main__":
fetch_stock_data()
copy_news_files()