rohannsinghal commited on
Commit
0967030
·
1 Parent(s): b259f00

fix: Import Request object

Browse files
Files changed (1) hide show
  1. app/main_api.py +21 -11
app/main_api.py CHANGED
@@ -9,7 +9,7 @@ import asyncio
9
  from itertools import cycle
10
 
11
  # FastAPI and core dependencies
12
- from fastapi import FastAPI, Body, HTTPException
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel, Field
15
 
@@ -88,7 +88,9 @@ class RAGPipeline:
88
 
89
  # In app/main_api.py, inside the RAGPipeline class
90
 
91
- def add_documents(self, chunks: List[Dict]): # Note: It receives dicts
 
 
92
  if not chunks:
93
  logger.warning("No chunks provided to add_documents.")
94
  return
@@ -96,7 +98,8 @@ class RAGPipeline:
96
  logger.info(f"Starting to add {len(chunks)} chunks...")
97
 
98
  # --- START OF FIX ---
99
- # The list now contains dictionaries, so we use dictionary access `c['key']`
 
100
  contents = [c['content'] for c in chunks]
101
  metadatas = [c['metadata'] for c in chunks]
102
  ids = [c['chunk_id'] for c in chunks]
@@ -141,10 +144,13 @@ class RAGPipeline:
141
  logger.error(f"Groq API call failed: {e}")
142
  return "Error: Could not generate an answer from the language model."
143
 
144
- # --- Main Hackathon Endpoint ---
145
  @app.post("/hackrx/run", response_model=SubmissionResponse)
146
- async def run_submission(request: SubmissionRequest = Body(...)):
147
 
 
 
 
 
148
  # 1. Cleanup and Setup
149
  try:
150
  for collection in chroma_client.list_collections():
@@ -154,12 +160,16 @@ async def run_submission(request: SubmissionRequest = Body(...)):
154
  logger.warning(f"Could not clean up old collections: {e}")
155
 
156
  session_collection_name = f"hackrx_session_{uuid.uuid4().hex}"
157
- rag_pipeline = RAGPipeline(collection_name=session_collection_name)
 
158
 
159
  # 2. Download and Process Documents
160
  all_chunks = []
 
 
 
161
  async with httpx.AsyncClient(timeout=120.0) as client:
162
- for doc_url in request.documents:
163
  try:
164
  logger.info(f"Downloading document from: {doc_url}")
165
  response = await client.get(doc_url, follow_redirects=True)
@@ -171,17 +181,17 @@ async def run_submission(request: SubmissionRequest = Body(...)):
171
  with open(temp_file_path, "wb") as f:
172
  f.write(response.content)
173
 
174
- # Your proven parsing logic
175
  chunks = parsing_service.process_pdf_ultrafast(temp_file_path)
176
  all_chunks.extend(chunks)
177
  os.remove(temp_file_path)
178
 
179
  except Exception as e:
180
- logger.error(f"Failed to process document at {doc_url}: {e}")
181
  continue
182
 
183
  if not all_chunks:
184
- failed_answers = [Answer(question=q, answer="A valid document could not be processed, so an answer could not be found.") for q in request.questions]
185
  return SubmissionResponse(answers=failed_answers)
186
 
187
  # 3. Add to Vector DB
@@ -193,7 +203,7 @@ async def run_submission(request: SubmissionRequest = Body(...)):
193
  answer_text = await rag_pipeline.generate_answer(question, relevant_docs)
194
  return Answer(question=question, answer=answer_text)
195
 
196
- tasks = [answer_question(q) for q in request.questions]
197
  answers = await asyncio.gather(*tasks)
198
 
199
  return SubmissionResponse(answers=answers)
 
9
  from itertools import cycle
10
 
11
  # FastAPI and core dependencies
12
+ from fastapi import FastAPI, Body, HTTPException, Request
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel, Field
15
 
 
88
 
89
  # In app/main_api.py, inside the RAGPipeline class
90
 
91
+ # In app/main_api.py, inside the RAGPipeline class
92
+
93
+ def add_documents(self, chunks: List[Dict]):
94
  if not chunks:
95
  logger.warning("No chunks provided to add_documents.")
96
  return
 
98
  logger.info(f"Starting to add {len(chunks)} chunks...")
99
 
100
  # --- START OF FIX ---
101
+ # The 'chunks' variable is a list of dictionaries. This code correctly
102
+ # uses dictionary key access `c['key']` to get the data.
103
  contents = [c['content'] for c in chunks]
104
  metadatas = [c['metadata'] for c in chunks]
105
  ids = [c['chunk_id'] for c in chunks]
 
144
  logger.error(f"Groq API call failed: {e}")
145
  return "Error: Could not generate an answer from the language model."
146
 
 
147
  @app.post("/hackrx/run", response_model=SubmissionResponse)
148
+ async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
149
 
150
+ # --- FIX: Access services from the application state via the request object ---
151
+ chroma_client = request.app.state.chroma_client
152
+ parsing_service = request.app.state.parsing_service
153
+
154
  # 1. Cleanup and Setup
155
  try:
156
  for collection in chroma_client.list_collections():
 
160
  logger.warning(f"Could not clean up old collections: {e}")
161
 
162
  session_collection_name = f"hackrx_session_{uuid.uuid4().hex}"
163
+ # --- FIX: Pass the request object to the RAG pipeline ---
164
+ rag_pipeline = RAGPipeline(collection_name=session_collection_name, request=request)
165
 
166
  # 2. Download and Process Documents
167
  all_chunks = []
168
+ # The UPLOAD_DIR variable should be defined at the top of your file
169
+ UPLOAD_DIR = "/tmp/docs"
170
+
171
  async with httpx.AsyncClient(timeout=120.0) as client:
172
+ for doc_url in submission_request.documents:
173
  try:
174
  logger.info(f"Downloading document from: {doc_url}")
175
  response = await client.get(doc_url, follow_redirects=True)
 
181
  with open(temp_file_path, "wb") as f:
182
  f.write(response.content)
183
 
184
+ # This call now correctly uses the parsing_service loaded in the app state
185
  chunks = parsing_service.process_pdf_ultrafast(temp_file_path)
186
  all_chunks.extend(chunks)
187
  os.remove(temp_file_path)
188
 
189
  except Exception as e:
190
+ logger.error(f"Failed to process document at {doc_url}: {e}", exc_info=True)
191
  continue
192
 
193
  if not all_chunks:
194
+ failed_answers = [Answer(question=q, answer="A valid document could not be processed, so an answer could not be found.") for q in submission_request.questions]
195
  return SubmissionResponse(answers=failed_answers)
196
 
197
  # 3. Add to Vector DB
 
203
  answer_text = await rag_pipeline.generate_answer(question, relevant_docs)
204
  return Answer(question=question, answer=answer_text)
205
 
206
+ tasks = [answer_question(q) for q in submission_request.questions]
207
  answers = await asyncio.gather(*tasks)
208
 
209
  return SubmissionResponse(answers=answers)