Ritabanm commited on
Commit
100da6f
·
verified ·
1 Parent(s): 1d40cb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -1,12 +1,12 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- import os, logging, traceback
5
- from pathlib import Path
6
  from typing import Optional, Dict
 
 
7
 
8
- # Writable base (HF Spaces persistent volume)
9
- DATA_DIR = os.getenv("DATA_DIR", "/data")
10
  INDEX_ROOT = os.getenv("INDEX_DIR", os.path.join(DATA_DIR, "index"))
11
  Path(INDEX_ROOT).mkdir(parents=True, exist_ok=True)
12
 
@@ -15,15 +15,14 @@ from agent.tools import FetchTools
15
  from ingest.sec import fetch_recent_filings_by_cik
16
 
17
  log = logging.getLogger("uvicorn.error")
18
-
19
  app = FastAPI(title="DeepDive IR Agent")
20
 
21
- # CORS: allow localhost + all vercel previews + your prod app
22
  app.add_middleware(
23
  CORSMiddleware,
24
  allow_origins=[
25
  "http://localhost:3000",
26
- "https://deepdive-ir-agent.vercel.app",
27
  ],
28
  allow_origin_regex=r"https://.*\.vercel\.app$",
29
  allow_credentials=True,
@@ -31,21 +30,21 @@ app.add_middleware(
31
  allow_headers=["*"],
32
  )
33
 
34
- # ---- Models ----
35
  class IngestRequest(BaseModel):
36
  cik: str
37
  ir_url: Optional[str] = None
38
 
39
  class AskRequest(BaseModel):
40
  question: str
41
- cik: Optional[str] = None # optional: if omitted we use the last ingested CIK
42
 
43
- # ---- State: cache graphs per CIK ----
44
- graphs: Dict[str, AgentGraph] = {}
45
  last_cik: Optional[str] = None
46
  tools = FetchTools()
47
 
48
- # ---- Helpers ----
49
  def norm_cik(raw: str) -> str:
50
  s = raw.strip()
51
  if not s.isdigit():
@@ -54,11 +53,12 @@ def norm_cik(raw: str) -> str:
54
  raise HTTPException(400, "CIK too long; use 10 digits.")
55
  return s.zfill(10)
56
 
57
- def company_index_dir(cik: str) -> str:
58
  d = os.path.join(INDEX_ROOT, cik)
59
  Path(d).mkdir(parents=True, exist_ok=True)
60
  return d
61
 
 
62
  @app.get("/")
63
  def root():
64
  return {"ok": True, "msg": "DeepDive IR Agent API"}
@@ -69,10 +69,14 @@ def healthz():
69
 
70
  @app.post("/ingest")
71
  async def ingest(req: IngestRequest):
 
 
 
72
  global last_cik
73
  try:
74
  cik = norm_cik(req.cik)
75
 
 
76
  filings = await fetch_recent_filings_by_cik(cik)
77
  docs = []
78
  for form, url, title in filings:
@@ -95,14 +99,17 @@ async def ingest(req: IngestRequest):
95
  if not docs:
96
  raise HTTPException(400, "No documents fetched.")
97
 
98
- idx_dir = company_index_dir(cik)
 
 
 
99
 
100
- # Some libraries write relative paths like "index/vecs.npy".
101
- # Make those land under /data/index/<CIK>/ by chdir-ing temporarily.
102
  prev = os.getcwd()
103
  os.chdir(idx_dir)
104
  try:
105
- g = AgentGraph(index_dir=idx_dir) # absolute, per-CIK dir
106
  g.build_index(docs)
107
  finally:
108
  os.chdir(prev)
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
 
 
4
  from typing import Optional, Dict
5
+ import os, shutil, logging, traceback
6
+ from pathlib import Path
7
 
8
+ # ===== Persistent storage on HF =====
9
+ DATA_DIR = os.getenv("DATA_DIR", "/data")
10
  INDEX_ROOT = os.getenv("INDEX_DIR", os.path.join(DATA_DIR, "index"))
11
  Path(INDEX_ROOT).mkdir(parents=True, exist_ok=True)
12
 
 
15
  from ingest.sec import fetch_recent_filings_by_cik
16
 
17
  log = logging.getLogger("uvicorn.error")
 
18
  app = FastAPI(title="DeepDive IR Agent")
19
 
20
+ # ===== CORS: localhost + vercel previews + your prod app =====
21
  app.add_middleware(
22
  CORSMiddleware,
23
  allow_origins=[
24
  "http://localhost:3000",
25
+ "https://deepdive-ir-agent.vercel.app", # <-- change if your prod URL is different
26
  ],
27
  allow_origin_regex=r"https://.*\.vercel\.app$",
28
  allow_credentials=True,
 
30
  allow_headers=["*"],
31
  )
32
 
33
+ # ===== Models =====
34
  class IngestRequest(BaseModel):
35
  cik: str
36
  ir_url: Optional[str] = None
37
 
38
  class AskRequest(BaseModel):
39
  question: str
40
+ cik: Optional[str] = None # optional; if omitted we use the last ingested CIK
41
 
42
+ # ===== State =====
43
+ graphs: Dict[str, AgentGraph] = {} # CIK -> AgentGraph
44
  last_cik: Optional[str] = None
45
  tools = FetchTools()
46
 
47
+ # ===== Helpers =====
48
  def norm_cik(raw: str) -> str:
49
  s = raw.strip()
50
  if not s.isdigit():
 
53
  raise HTTPException(400, "CIK too long; use 10 digits.")
54
  return s.zfill(10)
55
 
56
+ def idx_dir_for(cik: str) -> str:
57
  d = os.path.join(INDEX_ROOT, cik)
58
  Path(d).mkdir(parents=True, exist_ok=True)
59
  return d
60
 
61
+ # ===== Routes =====
62
  @app.get("/")
63
  def root():
64
  return {"ok": True, "msg": "DeepDive IR Agent API"}
 
69
 
70
  @app.post("/ingest")
71
  async def ingest(req: IngestRequest):
72
+ """
73
+ Build a fresh index for this CIK under /data/index/<CIK>.
74
+ """
75
  global last_cik
76
  try:
77
  cik = norm_cik(req.cik)
78
 
79
+ # Fetch recent filings + optional IR site
80
  filings = await fetch_recent_filings_by_cik(cik)
81
  docs = []
82
  for form, url, title in filings:
 
99
  if not docs:
100
  raise HTTPException(400, "No documents fetched.")
101
 
102
+ # Fresh per-CIK folder
103
+ idx_dir = idx_dir_for(cik)
104
+ shutil.rmtree(idx_dir, ignore_errors=True)
105
+ Path(idx_dir).mkdir(parents=True, exist_ok=True)
106
 
107
+ # Some libs may write relative paths like "index/vecs.npy".
108
+ # Build from the CIK directory so relative paths resolve to /data/index/<CIK>/...
109
  prev = os.getcwd()
110
  os.chdir(idx_dir)
111
  try:
112
+ g = AgentGraph(index_dir=idx_dir) # absolute per-CIK dir
113
  g.build_index(docs)
114
  finally:
115
  os.chdir(prev)