import subprocess import json import sys import gradio as gr def run_diagnostic(file): if file is None: return "No file uploaded" try: # Run the diagnostic script script_path = "/home/user/app/diagnostic_test.py" command = [sys.executable, script_path, file.name] print(f"Running diagnostic: {' '.join(command)}") process = subprocess.run( command, capture_output=True, text=True, timeout=300 ) print(f"Return code: {process.returncode}") print(f"Stderr: {process.stderr}") if process.returncode == 0: try: result = json.loads(process.stdout.strip()) diagnostics = result.get('diagnostics', {}) output = f""" # Diagnostic Results ## Detection Counts: - **Minimal Settings (72 DPI):** {diagnostics.get('minimal_detections', 0)} - **Current Settings (300 DPI):** {diagnostics.get('current_detections', 0)} - **Aggressive Settings (Enhanced):** {diagnostics.get('aggressive_detections', 0)} ## Best Method: {diagnostics.get('best_method', 'none')} ({diagnostics.get('best_count', 0)} detections) ## Sample Detected Text: """ sample_texts = diagnostics.get('sample_texts', []) if sample_texts: for i, text in enumerate(sample_texts): output += f"{i+1}. {text}\n" else: output += "No text detected in any method\n" output += f"\n**Total Pages:** {diagnostics.get('total_pages', 0)}" return output except json.JSONDecodeError as e: return f"JSON parse error: {e}\nOutput: {process.stdout}" else: return f"Process failed: {process.stderr}" except Exception as e: return f"Error: {e}" # Simple diagnostic interface with gr.Blocks(title="PaddleOCR Diagnostic") as demo: gr.Markdown("# PaddleOCR Performance Diagnostic") gr.Markdown("This will test different OCR settings to identify why detection is poor.") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) test_btn = gr.Button("Run Diagnostic", variant="primary") output = gr.Markdown(label="Diagnostic Results") test_btn.click( fn=run_diagnostic, inputs=[file_input], outputs=[output] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)