Spaces:
Sleeping
Sleeping
Commit ·
9dc6d26
1
Parent(s): 7c4579c
fixed
Browse files
preprocessing/preprocessing_modules/modular_preprocessor.py
CHANGED
|
@@ -191,11 +191,11 @@ class ModularDocumentPreprocessor:
|
|
| 191 |
except Exception as e:
|
| 192 |
print(f"❌ Error processing document {doc_id}: {str(e)}")
|
| 193 |
raise
|
| 194 |
-
finally:
|
| 195 |
# Clean up temporary file - but NOT for images since they need the file path
|
| 196 |
# Images return a third element indicating no cleanup needed
|
| 197 |
-
if temp_file_path and ext not in ['png', 'jpeg', 'jpg']:
|
| 198 |
-
|
| 199 |
|
| 200 |
async def process_multiple_documents(self, document_urls: List[str], force_reprocess: bool = False) -> Dict[str, str]:
|
| 201 |
"""
|
|
|
|
| 191 |
except Exception as e:
|
| 192 |
print(f"❌ Error processing document {doc_id}: {str(e)}")
|
| 193 |
raise
|
| 194 |
+
# finally:
|
| 195 |
# Clean up temporary file - but NOT for images since they need the file path
|
| 196 |
# Images return a third element indicating no cleanup needed
|
| 197 |
+
# if temp_file_path and ext not in ['png', 'jpeg', 'jpg']:
|
| 198 |
+
# self.file_downloader.cleanup_temp_file(temp_file_path)
|
| 199 |
|
| 200 |
async def process_multiple_documents(self, document_urls: List[str], force_reprocess: bool = False) -> Dict[str, str]:
|
| 201 |
"""
|
response.json
CHANGED
|
@@ -1,102 +1,13 @@
|
|
| 1 |
{
|
| 2 |
-
"export_timestamp": "2025-08-
|
| 3 |
"metadata": {
|
| 4 |
-
"server_start_time": "2025-08-
|
| 5 |
-
"total_requests":
|
| 6 |
-
"successful_requests":
|
| 7 |
"error_requests": 0,
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"total_questions_processed": 7,
|
| 12 |
-
"total_documents_processed": 3,
|
| 13 |
-
"documents_already_preprocessed": 0,
|
| 14 |
-
"documents_newly_processed": 3,
|
| 15 |
-
"average_question_time": 0,
|
| 16 |
-
"pipeline_performance": {
|
| 17 |
-
"avg_query_expansion_time": 0,
|
| 18 |
-
"max_query_expansion_time": 0,
|
| 19 |
-
"avg_hybrid_search_time": 0,
|
| 20 |
-
"max_hybrid_search_time": 0,
|
| 21 |
-
"avg_reranking_time": 0,
|
| 22 |
-
"max_reranking_time": 0,
|
| 23 |
-
"avg_context_creation_time": 0,
|
| 24 |
-
"max_context_creation_time": 0,
|
| 25 |
-
"avg_llm_generation_time": 0,
|
| 26 |
-
"max_llm_generation_time": 0
|
| 27 |
-
}
|
| 28 |
},
|
| 29 |
-
"logs": [
|
| 30 |
-
{
|
| 31 |
-
"timestamp": "2025-08-09T17:32:36.383079",
|
| 32 |
-
"request_id": "req_000006",
|
| 33 |
-
"document_url": "https://hackrx.blob.core.windows.net/hackrx/rounds/FinalRound4SubmissionPDF.pdf?sv=2023-01-03&spr=https&st=2025-08-07T14%3A23%3A48Z&se=2027-08-08T14%3A23%3A00Z&sr=b&sp=r&sig=nMtZ2x9aBvz%2FPjRWboEOZIGB%2FaGfNf5TfBOrhGqSv4M%3D",
|
| 34 |
-
"questions": [
|
| 35 |
-
"What is my flight number?"
|
| 36 |
-
],
|
| 37 |
-
"answers": [
|
| 38 |
-
"d5cfc5. First, Sachin needs to determine his favorite city by calling the API endpoint `GET https://register.hackrx.in/submissions/myFavouriteCity`. According to the scraped content from [https://register.hackrx.in/submissions/myFavouriteCity], the API returns New York as the favorite city. Next, Sachin needs to find the landmark associated with New York in the parallel world. According to Page 2, the Eiffel Tower is currently located in New York. Finally, Sachin needs to determine the correct flight path based on the landmark. Since the landmark belonging to the favorite city is \"Eiffel Tower\", Sachin needs to call the endpoint `GET https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber`. According to the scraped content from [https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber], the flight number is f1b9b6. However, the instructions state to use the FifthCityFlightNumber for all other landmarks. Therefore, the flight number is d5cfc5 based on the scraped content from [https://register.hackrx.in/teams/public/flights/getFourthCityFlightNumber]."
|
| 39 |
-
],
|
| 40 |
-
"processing_time_seconds": 10.23,
|
| 41 |
-
"total_questions": 1,
|
| 42 |
-
"status": "success",
|
| 43 |
-
"error_message": null,
|
| 44 |
-
"document_id": "doc_ff1c5c998b1b",
|
| 45 |
-
"was_preprocessed": false,
|
| 46 |
-
"request_start_time": "2025-08-09T17:32:26.155250",
|
| 47 |
-
"request_end_time": "2025-08-09T17:32:36.383079",
|
| 48 |
-
"pipeline_timings": {},
|
| 49 |
-
"question_timings": []
|
| 50 |
-
},
|
| 51 |
-
{
|
| 52 |
-
"timestamp": "2025-08-09T17:32:26.142331",
|
| 53 |
-
"request_id": "req_000004",
|
| 54 |
-
"document_url": "https://register.hackrx.in/utils/get-secret-token?hackTeam=9488",
|
| 55 |
-
"questions": [
|
| 56 |
-
"Go to the link and get the secret token and return it"
|
| 57 |
-
],
|
| 58 |
-
"answers": [
|
| 59 |
-
"The secret token is 8859cb4a70fe75ce0656a5e2578f556e5e7f451e3c3390c48cd8b5e83903400f. According to the scraped content from [https://register.hackrx.in/utils/get-secret-token?hackTeam=9488], the secret token for hackTeam 9488 is 8859cb4a70fe75ce0656a5e2578f556e5e7f451e3c3390c48cd8b5e83903400f."
|
| 60 |
-
],
|
| 61 |
-
"processing_time_seconds": 6.27,
|
| 62 |
-
"total_questions": 1,
|
| 63 |
-
"status": "success",
|
| 64 |
-
"error_message": null,
|
| 65 |
-
"document_id": "doc_04b6ecc08f0b",
|
| 66 |
-
"was_preprocessed": false,
|
| 67 |
-
"request_start_time": "2025-08-09T17:32:19.872089",
|
| 68 |
-
"request_end_time": "2025-08-09T17:32:26.142331",
|
| 69 |
-
"pipeline_timings": {},
|
| 70 |
-
"question_timings": []
|
| 71 |
-
},
|
| 72 |
-
{
|
| 73 |
-
"timestamp": "2025-08-09T17:32:19.798990",
|
| 74 |
-
"request_id": "req_000002",
|
| 75 |
-
"document_url": "https://hackrx.blob.core.windows.net/hackrx/rounds/News.pdf?sv=2023-01-03&spr=https&st=2025-08-07T17%3A10%3A11Z&se=2026-08-08T17%3A10%3A00Z&sr=b&sp=r&sig=ybRsnfv%2B6VbxPz5xF7kLLjC4ehU0NF7KDkXua9ujSf0%3D",
|
| 76 |
-
"questions": [
|
| 77 |
-
"ട്രംപ് ഏത് ദിവസമാണ് 100% ശുൽകം പ്രഖ്യാപിച്ചത്?",
|
| 78 |
-
"ഏത് ഉത്പന്നങ്ങൾക്ക് ഈ 100% ഇറക്കുമതി ശുൽകം ബാധകമാണ്?",
|
| 79 |
-
"ഏത് സാഹചര്യത്തിൽ ഒരു കമ്പനിയ്ക്ക് ഈ 100% ശുൽകത്തിൽ നിന്നും നിന്നും ഒഴികെയാക്കും?",
|
| 80 |
-
"What was Apple’s investment commitment and what was its objective?",
|
| 81 |
-
"What impact will this new policy have on consumers and the global market?"
|
| 82 |
-
],
|
| 83 |
-
"answers": [
|
| 84 |
-
"ട്രംപ് 2025 ഓഗസ്റ്റ് 6-നാണ് 100% ശുൽകം പ്രഖ്യാപിച്ചത്. വിദേശത്ത് നിർമ്മിച്ച കമ്പ്യൂട്ടർ ചിപ്പുകളുടെയും സെമികണ്ടക്ടറുകളുടെയും ഇറക്കുമതിക്ക് 100% നികുതി ചുമത്തുമെന്നായിരുന്നു പ്രഖ്യാപനം.",
|
| 85 |
-
"വിദേശത്ത് നിർമ്മിച്ച കമ്പ്യൂട്ടർ ചിപ്പുകൾക്കും സെമികണ്ടക്ടറുകൾക്കുമാണ് 100% ഇറക്കുമതി തീരുവ ബാധകം.",
|
| 86 |
-
"അമേരിക്കയിൽ ഉത്പാദിപ്പിക്കാൻ പ്രതിജ്ഞാബദ്ധരായ കമ്പനികൾക്ക് ഈ 100% ശതമാനം ഇറക്കുമതി തീരുവ ബാധകമല്ല.",
|
| 87 |
-
"Apple pledged an upcoming investment of 600 billion dollars. The objective of this investment is not explicitly stated in the provided context.",
|
| 88 |
-
"The new policy of imposing a 100% tariff on imported computer chips and semiconductors, except for companies committed to manufacturing in the US, is expected to increase prices and lead to anti-trade reactions."
|
| 89 |
-
],
|
| 90 |
-
"processing_time_seconds": 9.44,
|
| 91 |
-
"total_questions": 5,
|
| 92 |
-
"status": "success",
|
| 93 |
-
"error_message": null,
|
| 94 |
-
"document_id": "doc_334ef1720708",
|
| 95 |
-
"was_preprocessed": false,
|
| 96 |
-
"request_start_time": "2025-08-09T17:32:10.354729",
|
| 97 |
-
"request_end_time": "2025-08-09T17:32:19.798990",
|
| 98 |
-
"pipeline_timings": {},
|
| 99 |
-
"question_timings": []
|
| 100 |
-
}
|
| 101 |
-
]
|
| 102 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"export_timestamp": "2025-08-09T12:45:57.981882",
|
| 3 |
"metadata": {
|
| 4 |
+
"server_start_time": "2025-08-09T12:35:54.450893",
|
| 5 |
+
"total_requests": 0,
|
| 6 |
+
"successful_requests": 0,
|
| 7 |
"error_requests": 0,
|
| 8 |
+
"average_processing_time": 0,
|
| 9 |
+
"total_questions_processed": 0,
|
| 10 |
+
"total_documents_processed": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
},
|
| 12 |
+
"logs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
}
|