Rahul-Samedavar commited on
Commit
9dc6d26
·
1 Parent(s): 7c4579c
preprocessing/preprocessing_modules/modular_preprocessor.py CHANGED
@@ -191,11 +191,11 @@ class ModularDocumentPreprocessor:
191
  except Exception as e:
192
  print(f"❌ Error processing document {doc_id}: {str(e)}")
193
  raise
194
- finally:
195
  # Clean up temporary file - but NOT for images since they need the file path
196
  # Images return a third element indicating no cleanup needed
197
- if temp_file_path and ext not in ['png', 'jpeg', 'jpg']:
198
- self.file_downloader.cleanup_temp_file(temp_file_path)
199
 
200
  async def process_multiple_documents(self, document_urls: List[str], force_reprocess: bool = False) -> Dict[str, str]:
201
  """
 
191
  except Exception as e:
192
  print(f"❌ Error processing document {doc_id}: {str(e)}")
193
  raise
194
+ # finally:
195
  # Clean up temporary file - but NOT for images since they need the file path
196
  # Images return a third element indicating no cleanup needed
197
+ # if temp_file_path and ext not in ['png', 'jpeg', 'jpg']:
198
+ # self.file_downloader.cleanup_temp_file(temp_file_path)
199
 
200
  async def process_multiple_documents(self, document_urls: List[str], force_reprocess: bool = False) -> Dict[str, str]:
201
  """
response.json CHANGED
@@ -1,102 +1,13 @@
1
  {
2
- "export_timestamp": "2025-08-09T17:34:41.076100",
3
  "metadata": {
4
- "server_start_time": "2025-08-09T17:31:39.874333",
5
- "total_requests": 3,
6
- "successful_requests": 3,
7
  "error_requests": 0,
8
- "partial_requests": 0,
9
- "success_rate": 100.0,
10
- "average_processing_time": 8.65,
11
- "total_questions_processed": 7,
12
- "total_documents_processed": 3,
13
- "documents_already_preprocessed": 0,
14
- "documents_newly_processed": 3,
15
- "average_question_time": 0,
16
- "pipeline_performance": {
17
- "avg_query_expansion_time": 0,
18
- "max_query_expansion_time": 0,
19
- "avg_hybrid_search_time": 0,
20
- "max_hybrid_search_time": 0,
21
- "avg_reranking_time": 0,
22
- "max_reranking_time": 0,
23
- "avg_context_creation_time": 0,
24
- "max_context_creation_time": 0,
25
- "avg_llm_generation_time": 0,
26
- "max_llm_generation_time": 0
27
- }
28
  },
29
- "logs": [
30
- {
31
- "timestamp": "2025-08-09T17:32:36.383079",
32
- "request_id": "req_000006",
33
- "document_url": "https://hackrx.blob.core.windows.net/hackrx/rounds/FinalRound4SubmissionPDF.pdf?sv=2023-01-03&spr=https&st=2025-08-07T14%3A23%3A48Z&se=2027-08-08T14%3A23%3A00Z&sr=b&sp=r&sig=nMtZ2x9aBvz%2FPjRWboEOZIGB%2FaGfNf5TfBOrhGqSv4M%3D",
34
- "questions": [
35
- "What is my flight number?"
36
- ],
37
- "answers": [
38
- "d5cfc5. First, Sachin needs to determine his favorite city by calling the API endpoint `GET https://register.hackrx.in/submissions/myFavouriteCity`. According to the scraped content from [https://register.hackrx.in/submissions/myFavouriteCity], the API returns New York as the favorite city. Next, Sachin needs to find the landmark associated with New York in the parallel world. According to Page 2, the Eiffel Tower is currently located in New York. Finally, Sachin needs to determine the correct flight path based on the landmark. Since the landmark belonging to the favorite city is \"Eiffel Tower\", Sachin needs to call the endpoint `GET https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber`. According to the scraped content from [https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber], the flight number is f1b9b6. However, the instructions state to use the FifthCityFlightNumber for all other landmarks. Therefore, the flight number is d5cfc5 based on the scraped content from [https://register.hackrx.in/teams/public/flights/getFourthCityFlightNumber]."
39
- ],
40
- "processing_time_seconds": 10.23,
41
- "total_questions": 1,
42
- "status": "success",
43
- "error_message": null,
44
- "document_id": "doc_ff1c5c998b1b",
45
- "was_preprocessed": false,
46
- "request_start_time": "2025-08-09T17:32:26.155250",
47
- "request_end_time": "2025-08-09T17:32:36.383079",
48
- "pipeline_timings": {},
49
- "question_timings": []
50
- },
51
- {
52
- "timestamp": "2025-08-09T17:32:26.142331",
53
- "request_id": "req_000004",
54
- "document_url": "https://register.hackrx.in/utils/get-secret-token?hackTeam=9488",
55
- "questions": [
56
- "Go to the link and get the secret token and return it"
57
- ],
58
- "answers": [
59
- "The secret token is 8859cb4a70fe75ce0656a5e2578f556e5e7f451e3c3390c48cd8b5e83903400f. According to the scraped content from [https://register.hackrx.in/utils/get-secret-token?hackTeam=9488], the secret token for hackTeam 9488 is 8859cb4a70fe75ce0656a5e2578f556e5e7f451e3c3390c48cd8b5e83903400f."
60
- ],
61
- "processing_time_seconds": 6.27,
62
- "total_questions": 1,
63
- "status": "success",
64
- "error_message": null,
65
- "document_id": "doc_04b6ecc08f0b",
66
- "was_preprocessed": false,
67
- "request_start_time": "2025-08-09T17:32:19.872089",
68
- "request_end_time": "2025-08-09T17:32:26.142331",
69
- "pipeline_timings": {},
70
- "question_timings": []
71
- },
72
- {
73
- "timestamp": "2025-08-09T17:32:19.798990",
74
- "request_id": "req_000002",
75
- "document_url": "https://hackrx.blob.core.windows.net/hackrx/rounds/News.pdf?sv=2023-01-03&spr=https&st=2025-08-07T17%3A10%3A11Z&se=2026-08-08T17%3A10%3A00Z&sr=b&sp=r&sig=ybRsnfv%2B6VbxPz5xF7kLLjC4ehU0NF7KDkXua9ujSf0%3D",
76
- "questions": [
77
- "ട്രംപ് ഏത് ദിവസമാണ് 100% ശുൽകം പ്രഖ്യാപിച്ചത്?",
78
- "ഏത് ഉത്പന്നങ്ങൾക്ക് ഈ 100% ഇറക്കുമതി ശുൽകം ബാധകമാണ്?",
79
- "ഏത് സാഹചര്യത്തിൽ ഒരു കമ്പനിയ്ക്ക് ഈ 100% ശുൽകത്തിൽ നിന്നും നിന്നും ഒഴികെയാക്കും?",
80
- "What was Apple’s investment commitment and what was its objective?",
81
- "What impact will this new policy have on consumers and the global market?"
82
- ],
83
- "answers": [
84
- "ട്രംപ് 2025 ഓഗസ്റ്റ് 6-നാണ് 100% ശുൽകം പ്രഖ്യാപിച്ചത്. വിദേശത്ത് നിർമ്മിച്ച കമ്പ്യൂട്ടർ ചിപ്പുകളുടെയും സെമികണ്ടക്ടറുകളുടെയും ഇറക്കുമതിക്ക് 100% നികുതി ചുമത്തുമെന്നായിരുന്നു പ്രഖ്യാപനം.",
85
- "വിദേശത്ത് നിർമ്മിച്ച കമ്പ്യൂട്ടർ ചിപ്പുകൾക്കും സെമികണ്ടക്ടറുകൾക്കുമാണ് 100% ഇറക്കുമതി തീരുവ ബാധകം.",
86
- "അമേരിക്കയിൽ ഉത്പാദിപ്പിക്കാൻ പ്രതിജ്ഞാബദ്ധരായ കമ്പനികൾക്ക് ഈ 100% ശതമാനം ഇറക്കുമതി തീരുവ ബാധകമല്ല.",
87
- "Apple pledged an upcoming investment of 600 billion dollars. The objective of this investment is not explicitly stated in the provided context.",
88
- "The new policy of imposing a 100% tariff on imported computer chips and semiconductors, except for companies committed to manufacturing in the US, is expected to increase prices and lead to anti-trade reactions."
89
- ],
90
- "processing_time_seconds": 9.44,
91
- "total_questions": 5,
92
- "status": "success",
93
- "error_message": null,
94
- "document_id": "doc_334ef1720708",
95
- "was_preprocessed": false,
96
- "request_start_time": "2025-08-09T17:32:10.354729",
97
- "request_end_time": "2025-08-09T17:32:19.798990",
98
- "pipeline_timings": {},
99
- "question_timings": []
100
- }
101
- ]
102
  }
 
1
  {
2
+ "export_timestamp": "2025-08-09T12:45:57.981882",
3
  "metadata": {
4
+ "server_start_time": "2025-08-09T12:35:54.450893",
5
+ "total_requests": 0,
6
+ "successful_requests": 0,
7
  "error_requests": 0,
8
+ "average_processing_time": 0,
9
+ "total_questions_processed": 0,
10
+ "total_documents_processed": 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
12
+ "logs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }