mbuckle commited on
Commit
503febe
·
1 Parent(s): 78b142a

Updated to work with enhanced file

Browse files
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -8,8 +8,8 @@ def test_ocr_minimal(file):
8
  return "No file uploaded", ""
9
 
10
  try:
11
- # Run the minimal test script
12
- script_path = "/home/user/app/minimal_test_paddle.py"
13
  command = [sys.executable, script_path, file.name]
14
 
15
  print(f"Running: {' '.join(command)}")
@@ -28,11 +28,22 @@ def test_ocr_minimal(file):
28
  if process.returncode == 0:
29
  try:
30
  result = json.loads(process.stdout.strip())
 
 
31
  summary = f"""
32
- **Success!**
33
- - Detections: {result.get('detections', 0)}
34
- - Text length: {len(result.get('text', ''))}
 
 
 
35
  """
 
 
 
 
 
 
36
  return summary, result.get('text', '')
37
  except json.JSONDecodeError:
38
  return f"JSON parse error. Stdout: {process.stdout}", ""
@@ -43,16 +54,19 @@ def test_ocr_minimal(file):
43
  return f"Error: {e}", ""
44
 
45
  # Simple Gradio interface for testing
46
- with gr.Blocks(title="OCR Test") as demo:
47
- gr.Markdown("# Simple OCR Test")
 
48
 
49
  with gr.Row():
50
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
51
- test_btn = gr.Button("Test OCR")
 
 
 
52
 
53
  with gr.Row():
54
- summary_output = gr.Markdown(label="Summary")
55
- text_output = gr.Textbox(label="Extracted Text", lines=10)
56
 
57
  test_btn.click(
58
  fn=test_ocr_minimal,
 
8
  return "No file uploaded", ""
9
 
10
  try:
11
+ # Run the enhanced test script
12
+ script_path = "/home/user/app/enhanced_paddle_test.py"
13
  command = [sys.executable, script_path, file.name]
14
 
15
  print(f"Running: {' '.join(command)}")
 
28
  if process.returncode == 0:
29
  try:
30
  result = json.loads(process.stdout.strip())
31
+
32
+ # Format the enhanced results
33
  summary = f"""
34
+ **Results Summary:**
35
+ - **Best Approach:** {result.get('best_approach', 'Unknown')}
36
+ - **Best Detections:** {result.get('detections', 0)}
37
+ - **Text Length:** {len(result.get('text', ''))}
38
+
39
+ **All Approaches:**
40
  """
41
+
42
+ # Add results for each approach
43
+ all_results = result.get('all_results', {})
44
+ for approach_name, approach_data in all_results.items():
45
+ summary += f"\n- **{approach_name}:** {approach_data.get('detections', 0)} detections"
46
+
47
  return summary, result.get('text', '')
48
  except json.JSONDecodeError:
49
  return f"JSON parse error. Stdout: {process.stdout}", ""
 
54
  return f"Error: {e}", ""
55
 
56
  # Simple Gradio interface for testing
57
+ with gr.Blocks(title="Enhanced OCR Test") as demo:
58
+ gr.Markdown("# Enhanced OCR Test - Multiple Approaches")
59
+ gr.Markdown("This will test different DPI settings and OCR configurations to find the best quality match for your local implementation.")
60
 
61
  with gr.Row():
62
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
63
+ test_btn = gr.Button("Test Multiple OCR Approaches")
64
+
65
+ with gr.Row():
66
+ summary_output = gr.Markdown(label="Results Summary")
67
 
68
  with gr.Row():
69
+ text_output = gr.Textbox(label="Best Extracted Text", lines=15)
 
70
 
71
  test_btn.click(
72
  fn=test_ocr_minimal,