redhairedshanks1 commited on
Commit
dd3d80f
Β·
1 Parent(s): 8236e2f

Update utilities/extract_text.py

Browse files
Files changed (1) hide show
  1. utilities/extract_text.py +11 -6
utilities/extract_text.py CHANGED
@@ -85,14 +85,15 @@ def extract_text_remote(state):
85
  print(f"File size in bytes: {file_size}")
86
 
87
  # Call API and wait for response
88
- # NOTE: Don't set Content-Type header - requests will set it automatically with boundary
89
  try:
90
- resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=120)
91
  except requests.exceptions.Timeout:
92
- print(f"❌ Request timed out after 120 seconds")
93
- raise RuntimeError("API request timed out")
94
  except requests.exceptions.RequestException as e:
95
  print(f"❌ Request exception: {str(e)}")
 
96
 
97
  print(f"\nπŸ“₯ API Response:")
98
  print(f"Status Code: {resp.status_code}")
@@ -100,17 +101,21 @@ def extract_text_remote(state):
100
 
101
  if resp.status_code != 200:
102
  print(f"❌ Error Response: {resp.text[:500]}")
103
- raise RuntimeError(f"Extract text API failed: {resp.text}")
104
 
105
  try:
106
  response_json = resp.json()
107
  print(f"Response JSON keys: {list(response_json.keys())}")
108
  print(f"Response JSON: {str(response_json)[:500]}")
109
 
110
- extracted_text = response_json.get("text", "")
 
 
111
  text_length = len(extracted_text) if extracted_text else 0
112
 
113
  print(f"\nπŸ“Š Extraction Result:")
 
 
114
  print(f"Text Length: {text_length} characters")
115
  if text_length > 0:
116
  print(f"First 200 chars: {extracted_text[:200]}")
 
85
  print(f"File size in bytes: {file_size}")
86
 
87
  # Call API and wait for response
88
+ # NOTE: Increased timeout to 300 seconds (5 minutes) for large files
89
  try:
90
+ resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=300)
91
  except requests.exceptions.Timeout:
92
+ print(f"❌ Request timed out after 300 seconds")
93
+ raise RuntimeError("API request timed out after 5 minutes")
94
  except requests.exceptions.RequestException as e:
95
  print(f"❌ Request exception: {str(e)}")
96
+ raise RuntimeError(f"API request failed: {str(e)}")
97
 
98
  print(f"\nπŸ“₯ API Response:")
99
  print(f"Status Code: {resp.status_code}")
 
101
 
102
  if resp.status_code != 200:
103
  print(f"❌ Error Response: {resp.text[:500]}")
104
+ raise RuntimeError(f"Extract text API failed with status {resp.status_code}: {resp.text}")
105
 
106
  try:
107
  response_json = resp.json()
108
  print(f"Response JSON keys: {list(response_json.keys())}")
109
  print(f"Response JSON: {str(response_json)[:500]}")
110
 
111
+ # IMPORTANT: The API returns {"status": "completed", "result": "text here", "process_id": "..."}
112
+ # NOT {"text": "..."}
113
+ extracted_text = response_json.get("result", "") or response_json.get("text", "")
114
  text_length = len(extracted_text) if extracted_text else 0
115
 
116
  print(f"\nπŸ“Š Extraction Result:")
117
+ print(f"API Status: {response_json.get('status', 'unknown')}")
118
+ print(f"Process ID: {response_json.get('process_id', 'none')}")
119
  print(f"Text Length: {text_length} characters")
120
  if text_length > 0:
121
  print(f"First 200 chars: {extracted_text[:200]}")