Spaces:
Sleeping
Sleeping
| import subprocess | |
| import json | |
| import sys | |
| import gradio as gr | |
| def run_diagnostic(file): | |
| if file is None: | |
| return "No file uploaded" | |
| try: | |
| # Run the diagnostic script | |
| script_path = "/home/user/app/diagnostic_test.py" | |
| command = [sys.executable, script_path, file.name] | |
| print(f"Running diagnostic: {' '.join(command)}") | |
| process = subprocess.run( | |
| command, | |
| capture_output=True, | |
| text=True, | |
| timeout=300 | |
| ) | |
| print(f"Return code: {process.returncode}") | |
| print(f"Stderr: {process.stderr}") | |
| if process.returncode == 0: | |
| try: | |
| result = json.loads(process.stdout.strip()) | |
| diagnostics = result.get('diagnostics', {}) | |
| output = f""" | |
| # Diagnostic Results | |
| ## Detection Counts: | |
| - **Minimal Settings (72 DPI):** {diagnostics.get('minimal_detections', 0)} | |
| - **Current Settings (300 DPI):** {diagnostics.get('current_detections', 0)} | |
| - **Aggressive Settings (Enhanced):** {diagnostics.get('aggressive_detections', 0)} | |
| ## Best Method: {diagnostics.get('best_method', 'none')} ({diagnostics.get('best_count', 0)} detections) | |
| ## Sample Detected Text: | |
| """ | |
| sample_texts = diagnostics.get('sample_texts', []) | |
| if sample_texts: | |
| for i, text in enumerate(sample_texts): | |
| output += f"{i+1}. {text}\n" | |
| else: | |
| output += "No text detected in any method\n" | |
| output += f"\n**Total Pages:** {diagnostics.get('total_pages', 0)}" | |
| return output | |
| except json.JSONDecodeError as e: | |
| return f"JSON parse error: {e}\nOutput: {process.stdout}" | |
| else: | |
| return f"Process failed: {process.stderr}" | |
| except Exception as e: | |
| return f"Error: {e}" | |
| # Simple diagnostic interface | |
| with gr.Blocks(title="PaddleOCR Diagnostic") as demo: | |
| gr.Markdown("# PaddleOCR Performance Diagnostic") | |
| gr.Markdown("This will test different OCR settings to identify why detection is poor.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| test_btn = gr.Button("Run Diagnostic", variant="primary") | |
| output = gr.Markdown(label="Diagnostic Results") | |
| test_btn.click( | |
| fn=run_diagnostic, | |
| inputs=[file_input], | |
| outputs=[output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |