Spaces:
Sleeping
Sleeping
| import subprocess | |
| import json | |
| import sys | |
| import gradio as gr | |
| def test_ocr_minimal(file): | |
| if file is None: | |
| return "No file uploaded", "", "" | |
| try: | |
| # Run the enhanced test script | |
| script_path = "/home/user/app/enhanced_paddle_test.py" | |
| command = [sys.executable, script_path, file.name] | |
| print(f"Running: {' '.join(command)}") | |
| process = subprocess.run( | |
| command, | |
| capture_output=True, | |
| text=True, | |
| timeout=300 # 5 minutes for multi-page processing | |
| ) | |
| print(f"Return code: {process.returncode}") | |
| print(f"Stderr: {process.stderr}") | |
| if process.returncode == 0: | |
| try: | |
| result = json.loads(process.stdout.strip()) | |
| # Format the comprehensive results | |
| summary = f""" | |
| **Enhanced OCR Results:** | |
| - **Total Detections:** {result.get('total_detections', 0)} | |
| - **Pages Processed:** {result.get('pages_processed', 0)} | |
| - **Text Length:** {len(result.get('text', ''))} | |
| - **Lab Values Found:** {len(result.get('lab_values', {}))} | |
| - **Settings:** {result.get('settings', 'Unknown')} | |
| **Sample Numbers:** {', '.join(result.get('numbers_found', [])[:10])} | |
| **Sample Terms:** {', '.join(result.get('terms_found', [])[:10])} | |
| **Lab Values Detected:** | |
| """ | |
| # Add lab values to summary | |
| lab_values = result.get('lab_values', {}) | |
| if lab_values: | |
| for name, data in lab_values.items(): | |
| summary += f"- **{name}:** {data.get('value', 'N/A')} (confidence: {data.get('confidence', 0):.2f})\n" | |
| else: | |
| summary += "- No lab values detected with current patterns\n" | |
| # Format lab values for display | |
| lab_display = "**Detected Lab Values:**\n\n" | |
| if lab_values: | |
| for name, data in lab_values.items(): | |
| lab_display += f"**{name}:** {data.get('value', 'N/A')}\n" | |
| lab_display += f" - Raw text: {data.get('raw_text', 'N/A')}\n" | |
| lab_display += f" - Confidence: {data.get('confidence', 0):.2f}\n\n" | |
| else: | |
| lab_display += "No lab values detected. The OCR may need pattern adjustments for this document format.\n" | |
| return summary, result.get('text', ''), lab_display | |
| except json.JSONDecodeError as e: | |
| return f"JSON parse error: {e}\nStdout: {process.stdout}", "", "" | |
| else: | |
| return f"Process failed with code {process.returncode}\nStderr: {process.stderr}", "", "" | |
| except subprocess.TimeoutExpired: | |
| return "Process timed out after 5 minutes", "", "" | |
| except Exception as e: | |
| return f"Error: {e}", "", "" | |
| # Enhanced Gradio interface | |
| with gr.Blocks(title="Enhanced Medical OCR Test") as demo: | |
| gr.Markdown("# Enhanced Medical Document OCR") | |
| gr.Markdown("This processes all pages with medical-specific patterns and extracts lab values similar to the local implementation.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| test_btn = gr.Button("Run Enhanced OCR", variant="primary") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Results Summary") | |
| summary_output = gr.Markdown(label="Summary") | |
| with gr.Column(): | |
| gr.Markdown("### Lab Values") | |
| lab_output = gr.Markdown(label="Lab Values") | |
| with gr.Row(): | |
| gr.Markdown("### Full Extracted Text") | |
| text_output = gr.Textbox(label="Complete OCR Text", lines=20, max_lines=30) | |
| test_btn.click( | |
| fn=test_ocr_minimal, | |
| inputs=[file_input], | |
| outputs=[summary_output, text_output, lab_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |