Spaces:
Sleeping
Sleeping
Updated to work with enhanced file
Browse files
app.py
CHANGED
|
@@ -8,8 +8,8 @@ def test_ocr_minimal(file):
|
|
| 8 |
return "No file uploaded", ""
|
| 9 |
|
| 10 |
try:
|
| 11 |
-
# Run the
|
| 12 |
-
script_path = "/home/user/app/
|
| 13 |
command = [sys.executable, script_path, file.name]
|
| 14 |
|
| 15 |
print(f"Running: {' '.join(command)}")
|
|
@@ -28,11 +28,22 @@ def test_ocr_minimal(file):
|
|
| 28 |
if process.returncode == 0:
|
| 29 |
try:
|
| 30 |
result = json.loads(process.stdout.strip())
|
|
|
|
|
|
|
| 31 |
summary = f"""
|
| 32 |
-
**
|
| 33 |
-
-
|
| 34 |
-
-
|
|
|
|
|
|
|
|
|
|
| 35 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return summary, result.get('text', '')
|
| 37 |
except json.JSONDecodeError:
|
| 38 |
return f"JSON parse error. Stdout: {process.stdout}", ""
|
|
@@ -43,16 +54,19 @@ def test_ocr_minimal(file):
|
|
| 43 |
return f"Error: {e}", ""
|
| 44 |
|
| 45 |
# Simple Gradio interface for testing
|
| 46 |
-
with gr.Blocks(title="OCR Test") as demo:
|
| 47 |
-
gr.Markdown("#
|
|
|
|
| 48 |
|
| 49 |
with gr.Row():
|
| 50 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
| 51 |
-
test_btn = gr.Button("Test OCR")
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
with gr.Row():
|
| 54 |
-
|
| 55 |
-
text_output = gr.Textbox(label="Extracted Text", lines=10)
|
| 56 |
|
| 57 |
test_btn.click(
|
| 58 |
fn=test_ocr_minimal,
|
|
|
|
| 8 |
return "No file uploaded", ""
|
| 9 |
|
| 10 |
try:
|
| 11 |
+
# Run the enhanced test script
|
| 12 |
+
script_path = "/home/user/app/enhanced_paddle_test.py"
|
| 13 |
command = [sys.executable, script_path, file.name]
|
| 14 |
|
| 15 |
print(f"Running: {' '.join(command)}")
|
|
|
|
| 28 |
if process.returncode == 0:
|
| 29 |
try:
|
| 30 |
result = json.loads(process.stdout.strip())
|
| 31 |
+
|
| 32 |
+
# Format the enhanced results
|
| 33 |
summary = f"""
|
| 34 |
+
**Results Summary:**
|
| 35 |
+
- **Best Approach:** {result.get('best_approach', 'Unknown')}
|
| 36 |
+
- **Best Detections:** {result.get('detections', 0)}
|
| 37 |
+
- **Text Length:** {len(result.get('text', ''))}
|
| 38 |
+
|
| 39 |
+
**All Approaches:**
|
| 40 |
"""
|
| 41 |
+
|
| 42 |
+
# Add results for each approach
|
| 43 |
+
all_results = result.get('all_results', {})
|
| 44 |
+
for approach_name, approach_data in all_results.items():
|
| 45 |
+
summary += f"\n- **{approach_name}:** {approach_data.get('detections', 0)} detections"
|
| 46 |
+
|
| 47 |
return summary, result.get('text', '')
|
| 48 |
except json.JSONDecodeError:
|
| 49 |
return f"JSON parse error. Stdout: {process.stdout}", ""
|
|
|
|
| 54 |
return f"Error: {e}", ""
|
| 55 |
|
| 56 |
# Simple Gradio interface for testing
|
| 57 |
+
with gr.Blocks(title="Enhanced OCR Test") as demo:
|
| 58 |
+
gr.Markdown("# Enhanced OCR Test - Multiple Approaches")
|
| 59 |
+
gr.Markdown("This will test different DPI settings and OCR configurations to find the best quality match for your local implementation.")
|
| 60 |
|
| 61 |
with gr.Row():
|
| 62 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
| 63 |
+
test_btn = gr.Button("Test Multiple OCR Approaches")
|
| 64 |
+
|
| 65 |
+
with gr.Row():
|
| 66 |
+
summary_output = gr.Markdown(label="Results Summary")
|
| 67 |
|
| 68 |
with gr.Row():
|
| 69 |
+
text_output = gr.Textbox(label="Best Extracted Text", lines=15)
|
|
|
|
| 70 |
|
| 71 |
test_btn.click(
|
| 72 |
fn=test_ocr_minimal,
|