Ritabanm commited on
Commit
1d40cb0
·
verified ·
1 Parent(s): b865558

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -41
app.py CHANGED
@@ -3,13 +3,12 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import os, logging, traceback
5
  from pathlib import Path
 
6
 
7
- # ⬅️ we only need DATA_DIR here; we'll derive the absolute index path from it
8
- from settings import DATA_DIR
9
-
10
- # Make sure the HF persistent folder exists: /data/index
11
- INDEX_ABS = os.path.join(DATA_DIR, "index")
12
- Path(INDEX_ABS).mkdir(parents=True, exist_ok=True)
13
 
14
  from agent.graph import AgentGraph
15
  from agent.tools import FetchTools
@@ -19,27 +18,46 @@ log = logging.getLogger("uvicorn.error")
19
 
20
  app = FastAPI(title="DeepDive IR Agent")
21
 
22
- # CORS tighten later once your Vercel URL is final
23
  app.add_middleware(
24
  CORSMiddleware,
25
- allow_origins=["*"], # for debugging; later: ["https://<your-vercel>.vercel.app", "http://localhost:3000"]
 
 
 
 
26
  allow_credentials=True,
27
  allow_methods=["*"],
28
  allow_headers=["*"],
29
  )
30
 
31
- # Ensure index path exists (and is persistent on HF: set INDEX_DIR="/data/index" in settings)
32
- Path(INDEX_DIR).mkdir(parents=True, exist_ok=True)
33
-
34
- graph = AgentGraph(index_dir=INDEX_DIR)
35
- tools = FetchTools()
36
-
37
  class IngestRequest(BaseModel):
38
  cik: str
39
- ir_url: str | None = None
40
 
41
  class AskRequest(BaseModel):
42
  question: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  @app.get("/")
45
  def root():
@@ -51,54 +69,76 @@ def healthz():
51
 
52
  @app.post("/ingest")
53
  async def ingest(req: IngestRequest):
 
54
  try:
55
- # SEC wants a *10-digit, zero-padded* CIK string. Keep as string to preserve leading zeros.
56
- cik = req.cik.strip()
57
- if not cik.isdigit():
58
- raise HTTPException(status_code=400, detail="CIK must be digits only.")
59
- if len(cik) > 10:
60
- raise HTTPException(status_code=400, detail="CIK too long; use 10 digits.")
61
- cik = cik.zfill(10)
62
-
63
- filings = await fetch_recent_filings_by_cik(cik) # must send a real User-Agent inside!
64
  docs = []
65
  for form, url, title in filings:
66
- text = await tools.get_text_from_url(url) # must send a real User-Agent inside!
67
- if not text:
68
- log.warning(f"Empty text for {url}")
69
- continue
70
- docs.append({"title": title, "url": url, "text": text})
 
 
71
 
72
  if req.ir_url:
73
- ir_text = await tools.get_text_from_url(req.ir_url)
74
- if ir_text:
75
- docs.append({"title": "IR site", "url": req.ir_url, "text": ir_text})
 
 
 
76
 
77
  if not docs:
78
- raise HTTPException(status_code=400, detail="No documents fetched.")
 
 
79
 
80
- graph.build_index(docs)
81
- return {"ok": True, "num_docs": len(docs)}
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  except HTTPException:
84
  raise
85
  except Exception as e:
86
- # Log full traceback for debugging in HF "Logs"
87
  log.error("Ingest failed: %s\n%s", e, traceback.format_exc())
88
  raise HTTPException(status_code=502, detail=f"Ingest failed: {type(e).__name__}: {e}")
89
 
90
  @app.post("/ask")
91
  def ask(req: AskRequest):
92
  try:
93
- return graph.answer(req.question)
 
 
 
 
 
94
  except Exception as e:
95
  log.error("Ask failed: %s\n%s", e, traceback.format_exc())
96
- raise HTTPException(status_code=500, detail=str(e))
97
 
98
  @app.get("/brief")
99
- def brief():
100
  try:
101
- return graph.brief()
 
 
 
 
 
102
  except Exception as e:
103
  log.error("Brief failed: %s\n%s", e, traceback.format_exc())
104
- raise HTTPException(status_code=500, detail=str(e))
 
3
  from pydantic import BaseModel
4
  import os, logging, traceback
5
  from pathlib import Path
6
+ from typing import Optional, Dict
7
 
8
+ # Writable base (HF Spaces persistent volume)
9
+ DATA_DIR = os.getenv("DATA_DIR", "/data")
10
+ INDEX_ROOT = os.getenv("INDEX_DIR", os.path.join(DATA_DIR, "index"))
11
+ Path(INDEX_ROOT).mkdir(parents=True, exist_ok=True)
 
 
12
 
13
  from agent.graph import AgentGraph
14
  from agent.tools import FetchTools
 
18
 
19
  app = FastAPI(title="DeepDive IR Agent")
20
 
21
+ # CORS: allow localhost + all vercel previews + your prod app
22
  app.add_middleware(
23
  CORSMiddleware,
24
+ allow_origins=[
25
+ "http://localhost:3000",
26
+ "https://deepdive-ir-agent.vercel.app",
27
+ ],
28
+ allow_origin_regex=r"https://.*\.vercel\.app$",
29
  allow_credentials=True,
30
  allow_methods=["*"],
31
  allow_headers=["*"],
32
  )
33
 
34
+ # ---- Models ----
 
 
 
 
 
35
  class IngestRequest(BaseModel):
36
  cik: str
37
+ ir_url: Optional[str] = None
38
 
39
  class AskRequest(BaseModel):
40
  question: str
41
+ cik: Optional[str] = None # optional: if omitted we use the last ingested CIK
42
+
43
+ # ---- State: cache graphs per CIK ----
44
+ graphs: Dict[str, AgentGraph] = {}
45
+ last_cik: Optional[str] = None
46
+ tools = FetchTools()
47
+
48
+ # ---- Helpers ----
49
+ def norm_cik(raw: str) -> str:
50
+ s = raw.strip()
51
+ if not s.isdigit():
52
+ raise HTTPException(400, "CIK must be digits only.")
53
+ if len(s) > 10:
54
+ raise HTTPException(400, "CIK too long; use 10 digits.")
55
+ return s.zfill(10)
56
+
57
+ def company_index_dir(cik: str) -> str:
58
+ d = os.path.join(INDEX_ROOT, cik)
59
+ Path(d).mkdir(parents=True, exist_ok=True)
60
+ return d
61
 
62
  @app.get("/")
63
  def root():
 
69
 
70
  @app.post("/ingest")
71
  async def ingest(req: IngestRequest):
72
+ global last_cik
73
  try:
74
+ cik = norm_cik(req.cik)
75
+
76
+ filings = await fetch_recent_filings_by_cik(cik)
 
 
 
 
 
 
77
  docs = []
78
  for form, url, title in filings:
79
+ try:
80
+ text = await tools.get_text_from_url(url)
81
+ except Exception as e:
82
+ log.warning(f"Fetch failed for {url}: {e}")
83
+ text = ""
84
+ if text:
85
+ docs.append({"title": title, "url": url, "text": text})
86
 
87
  if req.ir_url:
88
+ try:
89
+ ir_text = await tools.get_text_from_url(req.ir_url)
90
+ if ir_text:
91
+ docs.append({"title": "IR site", "url": req.ir_url, "text": ir_text})
92
+ except Exception as e:
93
+ log.warning(f"IR fetch failed for {req.ir_url}: {e}")
94
 
95
  if not docs:
96
+ raise HTTPException(400, "No documents fetched.")
97
+
98
+ idx_dir = company_index_dir(cik)
99
 
100
+ # Some libraries write relative paths like "index/vecs.npy".
101
+ # Make those land under /data/index/<CIK>/ by chdir-ing temporarily.
102
+ prev = os.getcwd()
103
+ os.chdir(idx_dir)
104
+ try:
105
+ g = AgentGraph(index_dir=idx_dir) # absolute, per-CIK dir
106
+ g.build_index(docs)
107
+ finally:
108
+ os.chdir(prev)
109
+
110
+ graphs[cik] = g
111
+ last_cik = cik
112
+ return {"ok": True, "cik": cik, "num_docs": len(docs)}
113
 
114
  except HTTPException:
115
  raise
116
  except Exception as e:
 
117
  log.error("Ingest failed: %s\n%s", e, traceback.format_exc())
118
  raise HTTPException(status_code=502, detail=f"Ingest failed: {type(e).__name__}: {e}")
119
 
120
  @app.post("/ask")
121
  def ask(req: AskRequest):
122
  try:
123
+ cik = norm_cik(req.cik) if req.cik else last_cik
124
+ if not cik or cik not in graphs:
125
+ raise HTTPException(400, "No index available. Call /ingest with a CIK first.")
126
+ return graphs[cik].answer(req.question)
127
+ except HTTPException:
128
+ raise
129
  except Exception as e:
130
  log.error("Ask failed: %s\n%s", e, traceback.format_exc())
131
+ raise HTTPException(500, detail=str(e))
132
 
133
  @app.get("/brief")
134
+ def brief(cik: Optional[str] = None):
135
  try:
136
+ c = norm_cik(cik) if cik else last_cik
137
+ if not c or c not in graphs:
138
+ raise HTTPException(400, "No index available. Call /ingest with a CIK first.")
139
+ return graphs[c].brief()
140
+ except HTTPException:
141
+ raise
142
  except Exception as e:
143
  log.error("Brief failed: %s\n%s", e, traceback.format_exc())
144
+ raise HTTPException(500, detail=str(e))