File size: 2,073 Bytes
ad5d213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Test DocStrange Hugging Face API
Usage: python test_docstrange.py <HF_API_URL>
"""
import sys
import requests
import json
import os

if len(sys.argv) < 2:
    print("Usage: python test_docstrange.py <HF_API_URL>")
    print("Example: python test_docstrange.py https://your-username-docstrange.hf.space")
    sys.exit(1)

HF_URL = sys.argv[1].rstrip('/')

print(f"\n{'='*60}")
print(f"Testing DocStrange API: {HF_URL}")
print(f"{'='*60}\n")

# Test 1: Health check
print("1. Testing health check...")
try:
    resp = requests.get(f"{HF_URL}/")
    print(f"   Status: {resp.status_code}")
    print(f"   Response: {resp.json()}")
    print(f"   ✅ Health check passed!\n")
except Exception as e:
    print(f"   ❌ Failed: {e}\n")
    sys.exit(1)

# Test 2: Check for test PDF
test_pdf = "test.pdf"
if not os.path.exists(test_pdf):
    print(f"⚠️  No test.pdf found. Please add a test PDF to this directory.")
    print(f"   Or check API docs at: {HF_URL}/docs")
    sys.exit(0)

# Test 3: Full extraction
print(f"2. Testing document extraction with {test_pdf}...")
try:
    with open(test_pdf, 'rb') as f:
        resp = requests.post(
            f"{HF_URL}/extract",
            files={"file": f},
            timeout=120
        )
    
    print(f"   Status: {resp.status_code}")
    
    if resp.status_code == 200:
        data = resp.json()
        print(f"   ✅ Success!")
        print(f"   File: {data.get('file_name')}")
        print(f"   Format: {data.get('format')}")
        print(f"   Metadata: {json.dumps(data.get('metadata', {}), indent=2)}")
        
        # Preview data
        doc_data = data.get('data', {})
        if isinstance(doc_data, str):
            print(f"\n   Preview (first 200 chars):")
            print(f"   {doc_data[:200]}...")
        elif isinstance(doc_data, dict):
            print(f"\n   Data keys: {list(doc_data.keys())}")
    else:
        print(f"   ❌ Failed: {resp.text}\n")
        
except Exception as e:
    print(f"   ❌ Failed: {e}\n")

print(f"\n{'='*60}")
print("Test complete!")
print(f"{'='*60}\n")