redhairedshanks1 commited on
Commit
8236e2f
·
1 Parent(s): fc6e06b

Update utilities/extract_text.py

Browse files
Files changed (1) hide show
  1. utilities/extract_text.py +11 -1
utilities/extract_text.py CHANGED
@@ -75,14 +75,24 @@ def extract_text_remote(state):
75
  "end_page": str(state.get("end_page", 1)) # String, not int
76
  }
77
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
 
78
 
79
  print(f"\n🚀 Sending request to API...")
80
  print(f"File tuple: ('file', ('{file_basename}', <binary>, 'application/pdf'))")
81
  print(f"Data params: {data}")
82
  print(f"Data types: start_page={type(data['start_page'])}, end_page={type(data['end_page'])}")
 
 
83
 
84
  # Call API and wait for response
85
- resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=120)
 
 
 
 
 
 
 
86
 
87
  print(f"\n📥 API Response:")
88
  print(f"Status Code: {resp.status_code}")
 
75
  "end_page": str(state.get("end_page", 1)) # String, not int
76
  }
77
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
78
+ # DON'T set Content-Type - let requests handle it for multipart/form-data
79
 
80
  print(f"\n🚀 Sending request to API...")
81
  print(f"File tuple: ('file', ('{file_basename}', <binary>, 'application/pdf'))")
82
  print(f"Data params: {data}")
83
  print(f"Data types: start_page={type(data['start_page'])}, end_page={type(data['end_page'])}")
84
+ print(f"Headers: {headers}")
85
+ print(f"File size in bytes: {file_size}")
86
 
87
  # Call API and wait for response
88
+ # NOTE: Don't set Content-Type header - requests will set it automatically with boundary
89
+ try:
90
+ resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=120)
91
+ except requests.exceptions.Timeout:
92
+ print(f"❌ Request timed out after 120 seconds")
93
+ raise RuntimeError("API request timed out")
94
+ except requests.exceptions.RequestException as e:
95
+ print(f"❌ Request exception: {str(e)}")
96
 
97
  print(f"\n📥 API Response:")
98
  print(f"Status Code: {resp.status_code}")