File size: 2,650 Bytes
9b66525
8c10e64
43267ee
b7aa35b
 
96be125
43267ee
96be125
43267ee
9b66525
96be125
 
43267ee
9b66525
96be125
9b66525
43267ee
b7aa35b
43267ee
b7aa35b
96be125
b7aa35b
9b66525
43267ee
 
 
 
 
 
96be125
503febe
96be125
 
503febe
96be125
 
 
 
3ca6417
96be125
b92fc27
96be125
43267ee
96be125
 
 
 
b92fc27
96be125
b92fc27
96be125
b92fc27
96be125
b92fc27
 
96be125
43267ee
96be125
b7aa35b
cced363
96be125
cced363
96be125
 
 
 
8c10e64
43267ee
 
96be125
8c10e64
96be125
8c10e64
43267ee
96be125
43267ee
96be125
43267ee
8c10e64
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import subprocess
import json
import sys
import gradio as gr

def run_diagnostic(file):
    if file is None:
        return "No file uploaded"
    
    try:
        # Run the diagnostic script
        script_path = "/home/user/app/diagnostic_test.py"
        command = [sys.executable, script_path, file.name]
        
        print(f"Running diagnostic: {' '.join(command)}")
        
        process = subprocess.run(
            command,
            capture_output=True,
            text=True,
            timeout=300
        )
        
        print(f"Return code: {process.returncode}")
        print(f"Stderr: {process.stderr}")
        
        if process.returncode == 0:
            try:
                result = json.loads(process.stdout.strip())
                diagnostics = result.get('diagnostics', {})
                
                output = f"""
# Diagnostic Results

## Detection Counts:
- **Minimal Settings (72 DPI):** {diagnostics.get('minimal_detections', 0)}
- **Current Settings (300 DPI):** {diagnostics.get('current_detections', 0)}  
- **Aggressive Settings (Enhanced):** {diagnostics.get('aggressive_detections', 0)}

## Best Method: {diagnostics.get('best_method', 'none')} ({diagnostics.get('best_count', 0)} detections)

## Sample Detected Text:
"""
                sample_texts = diagnostics.get('sample_texts', [])
                if sample_texts:
                    for i, text in enumerate(sample_texts):
                        output += f"{i+1}. {text}\n"
                else:
                    output += "No text detected in any method\n"
                
                output += f"\n**Total Pages:** {diagnostics.get('total_pages', 0)}"
                
                return output
                
            except json.JSONDecodeError as e:
                return f"JSON parse error: {e}\nOutput: {process.stdout}"
        else:
            return f"Process failed: {process.stderr}"
            
    except Exception as e:
        return f"Error: {e}"

# Simple diagnostic interface
with gr.Blocks(title="PaddleOCR Diagnostic") as demo:
    gr.Markdown("# PaddleOCR Performance Diagnostic")
    gr.Markdown("This will test different OCR settings to identify why detection is poor.")
    
    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        test_btn = gr.Button("Run Diagnostic", variant="primary")
    
    output = gr.Markdown(label="Diagnostic Results")
    
    test_btn.click(
        fn=run_diagnostic,
        inputs=[file_input],
        outputs=[output]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)