Spaces:
Sleeping
Sleeping
Commit
·
8236e2f
1
Parent(s):
fc6e06b
Update utilities/extract_text.py
Browse files- utilities/extract_text.py +11 -1
utilities/extract_text.py
CHANGED
|
@@ -75,14 +75,24 @@ def extract_text_remote(state):
|
|
| 75 |
"end_page": str(state.get("end_page", 1)) # String, not int
|
| 76 |
}
|
| 77 |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
|
|
|
|
| 78 |
|
| 79 |
print(f"\n🚀 Sending request to API...")
|
| 80 |
print(f"File tuple: ('file', ('{file_basename}', <binary>, 'application/pdf'))")
|
| 81 |
print(f"Data params: {data}")
|
| 82 |
print(f"Data types: start_page={type(data['start_page'])}, end_page={type(data['end_page'])}")
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# Call API and wait for response
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
print(f"\n📥 API Response:")
|
| 88 |
print(f"Status Code: {resp.status_code}")
|
|
|
|
| 75 |
"end_page": str(state.get("end_page", 1)) # String, not int
|
| 76 |
}
|
| 77 |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
|
| 78 |
+
# DON'T set Content-Type - let requests handle it for multipart/form-data
|
| 79 |
|
| 80 |
print(f"\n🚀 Sending request to API...")
|
| 81 |
print(f"File tuple: ('file', ('{file_basename}', <binary>, 'application/pdf'))")
|
| 82 |
print(f"Data params: {data}")
|
| 83 |
print(f"Data types: start_page={type(data['start_page'])}, end_page={type(data['end_page'])}")
|
| 84 |
+
print(f"Headers: {headers}")
|
| 85 |
+
print(f"File size in bytes: {file_size}")
|
| 86 |
|
| 87 |
# Call API and wait for response
|
| 88 |
+
# NOTE: Don't set Content-Type header - requests will set it automatically with boundary
|
| 89 |
+
try:
|
| 90 |
+
resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=120)
|
| 91 |
+
except requests.exceptions.Timeout:
|
| 92 |
+
print(f"❌ Request timed out after 120 seconds")
|
| 93 |
+
raise RuntimeError("API request timed out")
|
| 94 |
+
except requests.exceptions.RequestException as e:
|
| 95 |
+
print(f"❌ Request exception: {str(e)}")
|
| 96 |
|
| 97 |
print(f"\n📥 API Response:")
|
| 98 |
print(f"Status Code: {resp.status_code}")
|