redhairedshanks1 commited on
Commit
d4c0b27
·
1 Parent(s): 37b146d

Update utilities/extract_tables.py

Browse files
Files changed (1) hide show
  1. utilities/extract_tables.py +11 -5
utilities/extract_tables.py CHANGED
@@ -46,18 +46,24 @@ def extract_tables_remote(state):
46
 
47
  if not os.path.exists(path):
48
  raise RuntimeError(f"File not found: {path}")
 
 
 
49
 
50
  with open(path, "rb") as f:
51
- files = {"file": (filename, f, "application/pdf")}
 
 
 
52
  data = {
53
- "filename": filename,
54
- "start_page": state.get("start_page", 1),
55
- "end_page": state.get("end_page", 1),
56
  }
57
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
58
 
59
  # Call API and wait for response
60
- resp = requests.post(EXTRACT_TABLES_API, files=files, data=data, headers=headers)
61
 
62
  if resp.status_code != 200:
63
  raise RuntimeError(f"Extract tables API failed: {resp.text}")
 
46
 
47
  if not os.path.exists(path):
48
  raise RuntimeError(f"File not found: {path}")
49
+
50
+ # Extract just the filename (not full path) to match curl format
51
+ file_basename = os.path.basename(path)
52
 
53
  with open(path, "rb") as f:
54
+ # IMPORTANT: Use basename for the file tuple (matches curl format)
55
+ files = {"file": (file_basename, f, "application/pdf")}
56
+
57
+ # IMPORTANT: Convert page numbers to strings (matches curl -F format)
58
  data = {
59
+ "filename": file_basename, # Just filename, not full path
60
+ "start_page": str(state.get("start_page", 1)), # String, not int
61
+ "end_page": str(state.get("end_page", 1)), # String, not int
62
  }
63
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_TOKEN')}"}
64
 
65
  # Call API and wait for response
66
+ resp = requests.post(EXTRACT_TABLES_API, files=files, data=data, headers=headers, timeout=120)
67
 
68
  if resp.status_code != 200:
69
  raise RuntimeError(f"Extract tables API failed: {resp.text}")