Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

redhairedshanks1 commited on Dec 15, 2025

Commit

d68f17d

1 Parent(s): 7b72f28

Update utilities/extract_text.py

Browse files

Files changed (1) hide show

utilities/extract_text.py +47 -3

utilities/extract_text.py CHANGED Viewed

@@ -46,6 +46,20 @@ def extract_text_remote(state):
     if not os.path.exists(path):
         raise RuntimeError(f"File not found: {path}")
     with open(path, "rb") as f:
         files = {"file": (filename, f, "application/pdf")}
         data = {
@@ -55,11 +69,41 @@ def extract_text_remote(state):
         }
         headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
         # Call API and wait for response
-        resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers)
     if resp.status_code != 200:
         raise RuntimeError(f"Extract text API failed: {resp.text}")
-    state["text"] = resp.json().get("text", "")
-    return state

     if not os.path.exists(path):
         raise RuntimeError(f"File not found: {path}")
+    # Get file size for debugging
+    file_size = os.path.getsize(path)
+    print(f"\n{'='*60}")
+    print(f"📄 EXTRACT TEXT API CALL")
+    print(f"{'='*60}")
+    print(f"File: {filename}")
+    print(f"Path: {path}")
+    print(f"Size: {file_size} bytes")
+    print(f"Start Page: {state.get('start_page', 1)}")
+    print(f"End Page: {state.get('end_page', 1)}")
+    print(f"API URL: {EXTRACT_TEXT_API}")
+    print(f"Auth Token: {'✓ Set' if os.getenv('HUGGINGFACE_API_TOKEN') else '✗ Not Set'}")
     with open(path, "rb") as f:
         files = {"file": (filename, f, "application/pdf")}
         data = {
         }
         headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
+        print(f"\n🚀 Sending request to API...")
+        print(f"Data params: {data}")
         # Call API and wait for response
+        resp = requests.post(EXTRACT_TEXT_API, files=files, data=data, headers=headers, timeout=120)
+    print(f"\n📥 API Response:")
+    print(f"Status Code: {resp.status_code}")
+    print(f"Response Headers: {dict(resp.headers)}")
     if resp.status_code != 200:
+        print(f"❌ Error Response: {resp.text[:500]}")
         raise RuntimeError(f"Extract text API failed: {resp.text}")
+    try:
+        response_json = resp.json()
+        print(f"Response JSON keys: {list(response_json.keys())}")
+        print(f"Response JSON: {str(response_json)[:500]}")
+        extracted_text = response_json.get("text", "")
+        text_length = len(extracted_text) if extracted_text else 0
+        print(f"\n📊 Extraction Result:")
+        print(f"Text Length: {text_length} characters")
+        if text_length > 0:
+            print(f"First 200 chars: {extracted_text[:200]}")
+        else:
+            print(f"⚠️  WARNING: API returned EMPTY text!")
+            print(f"Full response: {response_json}")
+        state["text"] = extracted_text
+        print(f"{'='*60}\n")
+        return state
+    except Exception as e:
+        print(f"❌ Error parsing response: {str(e)}")
+        print(f"Raw response: {resp.text[:500]}")
+        raise RuntimeError(f"Failed to parse API response: {str(e)}")