File size: 1,872 Bytes
38365d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
import json
import sys
import os

BASE_URL = "http://127.0.0.1:7860"
API_KEY = "alldocex-test-key-2024"

def test_sync_extract(file_path):
    print(f"Testing synchronous extraction for: {file_path}")
    
    if not os.path.exists(file_path):
        print(f"Error: File not found: {file_path}")
        return

    url = f"{BASE_URL}/api/v1/extract"
    headers = {
        "x-api-key": API_KEY
    }
    
    files = {
        "file": (os.path.basename(file_path), open(file_path, "rb"), "application/octet-stream")
    }
    
    try:
        response = requests.post(url, headers=headers, files=files)
        print(f"Status Code: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("\n--- RESULTS ---")
            print(f"Filename: {result.get('filename')}")
            print(f"Status: {result.get('status')}")
            print(f"Extraction Success: {result.get('extraction', {}).get('success')}")
            
            text = result.get('extraction', {}).get('raw_text', '')
            print(f"Full Text Length: {len(text)}")
            print(f"Snippet: {text[:200]}...")
            
            summary = result.get('summary', {}).get('summary', '')
            if summary:
                print(f"Summary Snippet: {summary[:200]}...")
            
            entities = result.get('entities', {}).get('total_entities', 0)
            print(f"Total Entities Foundations: {entities}")
            
            print("\n[SUCCESS] Synchronous endpoint working correctly.")
        else:
            print(f"Error Response: {response.text}")
            
    except Exception as e:
        print(f"Request failed: {e}")

if __name__ == "__main__":
    # Test with the existing sample document
    sample_doc = "test_document.docx"
    test_sync_extract(sample_doc)