Spaces:
Sleeping
Sleeping
Commit
·
0967030
1
Parent(s):
b259f00
fix: Import Request object
Browse files- app/main_api.py +21 -11
app/main_api.py
CHANGED
|
@@ -9,7 +9,7 @@ import asyncio
|
|
| 9 |
from itertools import cycle
|
| 10 |
|
| 11 |
# FastAPI and core dependencies
|
| 12 |
-
from fastapi import FastAPI, Body, HTTPException
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
|
|
@@ -88,7 +88,9 @@ class RAGPipeline:
|
|
| 88 |
|
| 89 |
# In app/main_api.py, inside the RAGPipeline class
|
| 90 |
|
| 91 |
-
|
|
|
|
|
|
|
| 92 |
if not chunks:
|
| 93 |
logger.warning("No chunks provided to add_documents.")
|
| 94 |
return
|
|
@@ -96,7 +98,8 @@ class RAGPipeline:
|
|
| 96 |
logger.info(f"Starting to add {len(chunks)} chunks...")
|
| 97 |
|
| 98 |
# --- START OF FIX ---
|
| 99 |
-
# The
|
|
|
|
| 100 |
contents = [c['content'] for c in chunks]
|
| 101 |
metadatas = [c['metadata'] for c in chunks]
|
| 102 |
ids = [c['chunk_id'] for c in chunks]
|
|
@@ -141,10 +144,13 @@ class RAGPipeline:
|
|
| 141 |
logger.error(f"Groq API call failed: {e}")
|
| 142 |
return "Error: Could not generate an answer from the language model."
|
| 143 |
|
| 144 |
-
# --- Main Hackathon Endpoint ---
|
| 145 |
@app.post("/hackrx/run", response_model=SubmissionResponse)
|
| 146 |
-
async def run_submission(request: SubmissionRequest = Body(...)):
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
# 1. Cleanup and Setup
|
| 149 |
try:
|
| 150 |
for collection in chroma_client.list_collections():
|
|
@@ -154,12 +160,16 @@ async def run_submission(request: SubmissionRequest = Body(...)):
|
|
| 154 |
logger.warning(f"Could not clean up old collections: {e}")
|
| 155 |
|
| 156 |
session_collection_name = f"hackrx_session_{uuid.uuid4().hex}"
|
| 157 |
-
|
|
|
|
| 158 |
|
| 159 |
# 2. Download and Process Documents
|
| 160 |
all_chunks = []
|
|
|
|
|
|
|
|
|
|
| 161 |
async with httpx.AsyncClient(timeout=120.0) as client:
|
| 162 |
-
for doc_url in
|
| 163 |
try:
|
| 164 |
logger.info(f"Downloading document from: {doc_url}")
|
| 165 |
response = await client.get(doc_url, follow_redirects=True)
|
|
@@ -171,17 +181,17 @@ async def run_submission(request: SubmissionRequest = Body(...)):
|
|
| 171 |
with open(temp_file_path, "wb") as f:
|
| 172 |
f.write(response.content)
|
| 173 |
|
| 174 |
-
#
|
| 175 |
chunks = parsing_service.process_pdf_ultrafast(temp_file_path)
|
| 176 |
all_chunks.extend(chunks)
|
| 177 |
os.remove(temp_file_path)
|
| 178 |
|
| 179 |
except Exception as e:
|
| 180 |
-
logger.error(f"Failed to process document at {doc_url}: {e}")
|
| 181 |
continue
|
| 182 |
|
| 183 |
if not all_chunks:
|
| 184 |
-
failed_answers = [Answer(question=q, answer="A valid document could not be processed, so an answer could not be found.") for q in
|
| 185 |
return SubmissionResponse(answers=failed_answers)
|
| 186 |
|
| 187 |
# 3. Add to Vector DB
|
|
@@ -193,7 +203,7 @@ async def run_submission(request: SubmissionRequest = Body(...)):
|
|
| 193 |
answer_text = await rag_pipeline.generate_answer(question, relevant_docs)
|
| 194 |
return Answer(question=question, answer=answer_text)
|
| 195 |
|
| 196 |
-
tasks = [answer_question(q) for q in
|
| 197 |
answers = await asyncio.gather(*tasks)
|
| 198 |
|
| 199 |
return SubmissionResponse(answers=answers)
|
|
|
|
| 9 |
from itertools import cycle
|
| 10 |
|
| 11 |
# FastAPI and core dependencies
|
| 12 |
+
from fastapi import FastAPI, Body, HTTPException, Request
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
|
|
|
|
| 88 |
|
| 89 |
# In app/main_api.py, inside the RAGPipeline class
|
| 90 |
|
| 91 |
+
# In app/main_api.py, inside the RAGPipeline class
|
| 92 |
+
|
| 93 |
+
def add_documents(self, chunks: List[Dict]):
|
| 94 |
if not chunks:
|
| 95 |
logger.warning("No chunks provided to add_documents.")
|
| 96 |
return
|
|
|
|
| 98 |
logger.info(f"Starting to add {len(chunks)} chunks...")
|
| 99 |
|
| 100 |
# --- START OF FIX ---
|
| 101 |
+
# The 'chunks' variable is a list of dictionaries. This code correctly
|
| 102 |
+
# uses dictionary key access `c['key']` to get the data.
|
| 103 |
contents = [c['content'] for c in chunks]
|
| 104 |
metadatas = [c['metadata'] for c in chunks]
|
| 105 |
ids = [c['chunk_id'] for c in chunks]
|
|
|
|
| 144 |
logger.error(f"Groq API call failed: {e}")
|
| 145 |
return "Error: Could not generate an answer from the language model."
|
| 146 |
|
|
|
|
| 147 |
@app.post("/hackrx/run", response_model=SubmissionResponse)
|
| 148 |
+
async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
|
| 149 |
|
| 150 |
+
# --- FIX: Access services from the application state via the request object ---
|
| 151 |
+
chroma_client = request.app.state.chroma_client
|
| 152 |
+
parsing_service = request.app.state.parsing_service
|
| 153 |
+
|
| 154 |
# 1. Cleanup and Setup
|
| 155 |
try:
|
| 156 |
for collection in chroma_client.list_collections():
|
|
|
|
| 160 |
logger.warning(f"Could not clean up old collections: {e}")
|
| 161 |
|
| 162 |
session_collection_name = f"hackrx_session_{uuid.uuid4().hex}"
|
| 163 |
+
# --- FIX: Pass the request object to the RAG pipeline ---
|
| 164 |
+
rag_pipeline = RAGPipeline(collection_name=session_collection_name, request=request)
|
| 165 |
|
| 166 |
# 2. Download and Process Documents
|
| 167 |
all_chunks = []
|
| 168 |
+
# The UPLOAD_DIR variable should be defined at the top of your file
|
| 169 |
+
UPLOAD_DIR = "/tmp/docs"
|
| 170 |
+
|
| 171 |
async with httpx.AsyncClient(timeout=120.0) as client:
|
| 172 |
+
for doc_url in submission_request.documents:
|
| 173 |
try:
|
| 174 |
logger.info(f"Downloading document from: {doc_url}")
|
| 175 |
response = await client.get(doc_url, follow_redirects=True)
|
|
|
|
| 181 |
with open(temp_file_path, "wb") as f:
|
| 182 |
f.write(response.content)
|
| 183 |
|
| 184 |
+
# This call now correctly uses the parsing_service loaded in the app state
|
| 185 |
chunks = parsing_service.process_pdf_ultrafast(temp_file_path)
|
| 186 |
all_chunks.extend(chunks)
|
| 187 |
os.remove(temp_file_path)
|
| 188 |
|
| 189 |
except Exception as e:
|
| 190 |
+
logger.error(f"Failed to process document at {doc_url}: {e}", exc_info=True)
|
| 191 |
continue
|
| 192 |
|
| 193 |
if not all_chunks:
|
| 194 |
+
failed_answers = [Answer(question=q, answer="A valid document could not be processed, so an answer could not be found.") for q in submission_request.questions]
|
| 195 |
return SubmissionResponse(answers=failed_answers)
|
| 196 |
|
| 197 |
# 3. Add to Vector DB
|
|
|
|
| 203 |
answer_text = await rag_pipeline.generate_answer(question, relevant_docs)
|
| 204 |
return Answer(question=question, answer=answer_text)
|
| 205 |
|
| 206 |
+
tasks = [answer_question(q) for q in submission_request.questions]
|
| 207 |
answers = await asyncio.gather(*tasks)
|
| 208 |
|
| 209 |
return SubmissionResponse(answers=answers)
|