Luis J Camargo commited on
Commit
acf8835
·
1 Parent(s): 4bdfa9b

feat: Add Gradio UI and inference logic for audio language classification with a custom Whisper encoder.

Browse files
Files changed (2) hide show
  1. app.py +71 -83
  2. default.yaml +104 -0
app.py CHANGED
@@ -22,10 +22,10 @@ logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging
22
  logger = logging.getLogger("TachiwinDocOCR")
23
 
24
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
25
- # The CLI generated filename is usually {pipeline_name}.yaml
26
- INTERNAL_CONFIG_FILE = "PaddleOCR-VL.yaml"
27
- # Our final working file
28
- FINAL_CONFIG_FILE = "custom_pipeline_config.yaml"
29
  OUTPUT_DIR = "output"
30
  os.makedirs(OUTPUT_DIR, exist_ok=True)
31
 
@@ -42,7 +42,7 @@ PADDLE_AVAILABLE = False
42
  try:
43
  import paddle
44
  import paddlex
45
- from paddleocr import PaddleOCRVL
46
  PADDLE_AVAILABLE = True
47
  logger.info(f"Paddle libraries loaded. PaddleX version: {getattr(paddlex, '__version__', 'Unknown')}")
48
  except ImportError as e:
@@ -60,80 +60,69 @@ def setup_pipeline():
60
  return
61
 
62
  try:
63
- logger.info("Starting setup_pipeline...")
64
 
65
- # 1. Generate default config via CLI
66
- if not os.path.exists(FINAL_CONFIG_FILE):
67
- logger.info("Generating default configuration via paddlex CLI...")
68
- # Command: paddlex --get_pipeline_config PaddleOCR-VL --save_path ./
 
 
 
69
  try:
70
- result = subprocess.run(
71
  ["paddlex", "--get_pipeline_config", "PaddleOCR-VL", "--save_path", "./"],
72
  capture_output=True, text=True, check=True
73
  )
74
- logger.info(f"CLI Output: {result.stdout}")
75
- except subprocess.CalledProcessError as e:
76
- logger.error(f"CLI Error: {e.stderr}")
77
- # If CLI fails, we can't proceed with custom model easily without a template
78
- raise e
79
-
80
- # The file generated is likely PaddleOCR-VL.yaml
81
- if os.path.exists(INTERNAL_CONFIG_FILE):
82
- os.rename(INTERNAL_CONFIG_FILE, FINAL_CONFIG_FILE)
83
- logger.info(f"Renamed {INTERNAL_CONFIG_FILE} to {FINAL_CONFIG_FILE}")
84
- else:
85
- logger.error(f"Expected config file {INTERNAL_CONFIG_FILE} was not found after CLI execution.")
86
- # List files to see what was created
87
- logger.info(f"Current directory files: {os.listdir('.')}")
88
- raise FileNotFoundError(f"Config file {INTERNAL_CONFIG_FILE} not found.")
89
-
90
- # 2. Load and Modify Config
91
- logger.info(f"Loading configuration from {FINAL_CONFIG_FILE}")
92
- with open(FINAL_CONFIG_FILE, 'r', encoding='utf-8') as f:
93
  config_data = yaml.safe_load(f)
94
 
95
- logger.info("Modifying configuration with custom model path...")
96
-
97
- # Search and update VLRecognition model_dir
98
  updated = False
99
- if 'SubModules' in config_data:
100
- if 'VLRecognition' in config_data['SubModules']:
101
- config_data['SubModules']['VLRecognition']['model_dir'] = CUSTOM_MODEL_PATH
102
- updated = True
103
-
104
- if not updated:
105
- # Deep search fallback
106
- def deep_update(d):
107
- count = 0
108
- for k, v in d.items():
109
- if k == 'VLRecognition' and isinstance(v, dict):
110
  v['model_dir'] = CUSTOM_MODEL_PATH
111
- count += 1
112
- elif isinstance(v, dict):
113
- count += deep_update(v)
114
- return count
115
- updated = deep_update(config_data) > 0
116
 
117
- if updated:
118
- logger.info(f"Successfully updated VLRecognition model_dir to {CUSTOM_MODEL_PATH}")
119
- else:
120
- logger.warning("Could not find VLRecognition sub-module in the configuration to update its path.")
121
 
122
- with open(FINAL_CONFIG_FILE, 'w', encoding='utf-8') as f:
123
- yaml.dump(config_data, f, default_flow_style=False)
 
 
124
 
125
- # Log final YAML for verification
126
- logger.info("--- UPDATED YAML CONFIG ---")
127
  print(yaml.dump(config_data, default_flow_style=False))
128
- logger.info("--- END UPDATED YAML ---")
129
 
130
- # 3. Initialize pipeline
131
- logger.info(f"Initializing PaddleOCRVL with config: {FINAL_CONFIG_FILE}")
132
- pipeline = PaddleOCRVL(pipeline_config=FINAL_CONFIG_FILE)
133
- logger.info("PaddleOCRVL initialized successfully.")
 
134
 
135
  except Exception as e:
136
- logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
137
  logger.error(traceback.format_exc())
138
 
139
  # Initial setup
@@ -193,11 +182,14 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
193
  # --- Inference Logic ---
194
 
195
  def run_inference(img_path, task_type="ocr"):
196
- if not PADDLE_AVAILABLE or pipeline is None:
197
- return "❌ Paddle backend not available. Check initialization logs.", "", "", ""
 
 
 
198
 
199
  if not img_path:
200
- return "⚠️ Please upload an image.", "", "", ""
201
 
202
  try:
203
  logger.info(f"--- Inference Start: {task_type} ---")
@@ -212,12 +204,10 @@ def run_inference(img_path, task_type="ocr"):
212
  os.makedirs(run_output_dir, exist_ok=True)
213
 
214
  for i, res in enumerate(output):
215
- # Save results
216
  res.save_to_json(save_path=run_output_dir)
217
  res.save_to_markdown(save_path=run_output_dir)
218
  res.print()
219
 
220
- # Read back generated files
221
  fnames = os.listdir(run_output_dir)
222
  for fname in fnames:
223
  fpath = os.path.join(run_output_dir, fname)
@@ -229,7 +219,7 @@ def run_inference(img_path, task_type="ocr"):
229
  json_content += f.read() + "\n\n"
230
  elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
231
  vis_src = image_to_base64_data_url(fpath)
232
- vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden;">'
233
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
234
  vis_html += f'</div>'
235
 
@@ -237,16 +227,15 @@ def run_inference(img_path, task_type="ocr"):
237
  md_content = "⚠️ Finished but no content was recognized."
238
 
239
  md_preview = _escape_inequalities_in_math(md_content)
240
- logger.info("--- Inference Finished ---")
241
  return md_preview, md_content, vis_html, json_content
242
 
243
  except Exception as e:
244
- logger.error(f"Inference Error: {e}")
245
  logger.error(traceback.format_exc())
246
  return f"❌ Error: {str(e)}", "", "", ""
247
 
248
  # --- UI Components ---
249
- # (Keeping previous UI logic)
250
 
251
  custom_css = """
252
  body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
@@ -257,9 +246,10 @@ body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
257
  color: white;
258
  border-radius: 1.5rem;
259
  margin-bottom: 2rem;
 
260
  }
261
  .app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; }
262
- .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
263
  .output-box { border: 1px solid #e2e8f0 !important; border-radius: 1rem !important; }
264
  """
265
 
@@ -268,22 +258,22 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
268
  """
269
  <div class="app-header">
270
  <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
271
- <p>Fine-tuned for the 68 Indigenous Languages of Mexico</p>
272
  </div>
273
  """
274
  )
275
 
276
  with gr.Row(elem_classes=["notice"]):
277
- gr.Markdown(f"**Engine:** PaddleOCRVL 1.5 | **Model:** `{CUSTOM_MODEL_PATH}`")
 
278
 
279
  with gr.Tabs():
280
- # Document Parsing Tab
281
  with gr.Tab("📄 Full Document Parsing"):
282
  with gr.Row():
283
  with gr.Column(scale=5):
284
- file_doc = gr.File(label="Upload Image", type="filepath")
285
  preview_doc_html = gr.HTML(visible=False)
286
- btn_parse = gr.Button(" Start Parsing", variant="primary")
287
  with gr.Row():
288
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
289
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
@@ -305,14 +295,13 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
305
 
306
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
307
 
308
- # Element Recognition Tab
309
  with gr.Tab("🧩 Specific Recognition"):
310
  with gr.Row():
311
  with gr.Column(scale=5):
312
  file_vl = gr.File(label="Upload Element", type="filepath")
313
  preview_vl_html = gr.HTML(visible=False)
314
  with gr.Row():
315
- btn_ocr = gr.Button("Text OCR", variant="secondary")
316
  btn_formula = gr.Button("Formula", variant="secondary")
317
  btn_table = gr.Button("Table", variant="secondary")
318
 
@@ -332,7 +321,6 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
332
  for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
333
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
334
 
335
- # Spotting Tab
336
  with gr.Tab("📍 Feature Spotting"):
337
  with gr.Row():
338
  with gr.Column(scale=5):
@@ -343,7 +331,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
343
  with gr.Column(scale=7):
344
  with gr.Tabs():
345
  with gr.Tab("🖼️ Detection"):
346
- vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
347
  with gr.Tab("💾 JSON Feed"):
348
  json_spot = gr.Code(label="JSON", language="json")
349
 
@@ -355,7 +343,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
355
 
356
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
357
 
358
- gr.Markdown("--- \n *May the indigenous languages of Mexico never be lost. Tachiwin Project.*")
359
 
360
  if __name__ == "__main__":
361
  demo.queue().launch()
 
22
  logger = logging.getLogger("TachiwinDocOCR")
23
 
24
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
25
+ # The YAML file provided by the user or generated
26
+ CONFIG_FILE = "default.yaml"
27
+ # Fallback generated if default.yaml doesn't exist
28
+ GENERATED_CONFIG = "PaddleOCR-VL.yaml"
29
  OUTPUT_DIR = "output"
30
  os.makedirs(OUTPUT_DIR, exist_ok=True)
31
 
 
42
  try:
43
  import paddle
44
  import paddlex
45
+ from paddlex import create_pipeline
46
  PADDLE_AVAILABLE = True
47
  logger.info(f"Paddle libraries loaded. PaddleX version: {getattr(paddlex, '__version__', 'Unknown')}")
48
  except ImportError as e:
 
60
  return
61
 
62
  try:
63
+ logger.info("🚀 Starting Tachiwin Doc OCR Pipeline Setup...")
64
 
65
+ target_config = None
66
+ # Use existing default.yaml if present
67
+ if os.path.exists(CONFIG_FILE):
68
+ logger.info(f"✅ Found existing configuration: {CONFIG_FILE}")
69
+ target_config = CONFIG_FILE
70
+ else:
71
+ logger.info(f"⚠️ {CONFIG_FILE} not found. Generating default configuration via paddlex CLI...")
72
  try:
73
+ subprocess.run(
74
  ["paddlex", "--get_pipeline_config", "PaddleOCR-VL", "--save_path", "./"],
75
  capture_output=True, text=True, check=True
76
  )
77
+ if os.path.exists(GENERATED_CONFIG):
78
+ target_config = GENERATED_CONFIG
79
+ logger.info(f" Generated {target_config}")
80
+ else:
81
+ logger.error(f"❌ CLI generation failed to produce {GENERATED_CONFIG}")
82
+ logger.info(f"Directory contents: {os.listdir('.')}")
83
+ return
84
+ except Exception as e:
85
+ logger.error(f"❌ Failed to run paddlex CLI: {e}")
86
+ return
87
+
88
+ # Load and verify/update config
89
+ logger.info(f"📄 Loading YAML from {target_config}...")
90
+ with open(target_config, 'r', encoding='utf-8') as f:
 
 
 
 
 
91
  config_data = yaml.safe_load(f)
92
 
93
+ # Update model_dir if it's not set correctly
 
 
94
  updated = False
95
+ def update_config(d):
96
+ nonlocal updated
97
+ for k, v in d.items():
98
+ if k == 'VLRecognition' and isinstance(v, dict):
99
+ if v.get('model_dir') != CUSTOM_MODEL_PATH:
100
+ logger.info(f"🔧 Updating VLRecognition model_dir: {v.get('model_dir')} -> {CUSTOM_MODEL_PATH}")
 
 
 
 
 
101
  v['model_dir'] = CUSTOM_MODEL_PATH
102
+ updated = True
103
+ elif isinstance(v, dict):
104
+ update_config(v)
 
 
105
 
106
+ update_config(config_data)
 
 
 
107
 
108
+ if updated:
109
+ with open(target_config, 'w', encoding='utf-8') as f:
110
+ yaml.dump(config_data, f, default_flow_style=False)
111
+ logger.info(f"💾 Updated configuration saved to {target_config}")
112
 
113
+ # Log the config being used
114
+ logger.info(f"--- [START] {target_config} CONTENT ---")
115
  print(yaml.dump(config_data, default_flow_style=False))
116
+ logger.info(f"--- [END] {target_config} CONTENT ---")
117
 
118
+ # Initialize pipeline using the recommended PaddleX way
119
+ logger.info(f"⚙️ Initializing pipeline with create_pipeline(pipeline={target_config})")
120
+ # According to help: create_pipeline can take a path to yaml
121
+ pipeline = create_pipeline(pipeline=target_config)
122
+ logger.info("✨ Pipeline initialized successfully!")
123
 
124
  except Exception as e:
125
+ logger.error(f"🔥 CRITICAL: Pipeline Setup Failed")
126
  logger.error(traceback.format_exc())
127
 
128
  # Initial setup
 
182
  # --- Inference Logic ---
183
 
184
  def run_inference(img_path, task_type="ocr"):
185
+ if not PADDLE_AVAILABLE:
186
+ return "❌ Paddle backend not installed.", "", "", ""
187
+
188
+ if pipeline is None:
189
+ return "❌ Pipeline is not initialized. Check server logs for error details.", "", "", ""
190
 
191
  if not img_path:
192
+ return "⚠️ No image provided.", "", "", ""
193
 
194
  try:
195
  logger.info(f"--- Inference Start: {task_type} ---")
 
204
  os.makedirs(run_output_dir, exist_ok=True)
205
 
206
  for i, res in enumerate(output):
 
207
  res.save_to_json(save_path=run_output_dir)
208
  res.save_to_markdown(save_path=run_output_dir)
209
  res.print()
210
 
 
211
  fnames = os.listdir(run_output_dir)
212
  for fname in fnames:
213
  fpath = os.path.join(run_output_dir, fname)
 
219
  json_content += f.read() + "\n\n"
220
  elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
221
  vis_src = image_to_base64_data_url(fpath)
222
+ vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
223
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
224
  vis_html += f'</div>'
225
 
 
227
  md_content = "⚠️ Finished but no content was recognized."
228
 
229
  md_preview = _escape_inequalities_in_math(md_content)
230
+ logger.info("--- Inference Finished Successfully ---")
231
  return md_preview, md_content, vis_html, json_content
232
 
233
  except Exception as e:
234
+ logger.error(f"Inference Error: {e}")
235
  logger.error(traceback.format_exc())
236
  return f"❌ Error: {str(e)}", "", "", ""
237
 
238
  # --- UI Components ---
 
239
 
240
  custom_css = """
241
  body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 
246
  color: white;
247
  border-radius: 1.5rem;
248
  margin-bottom: 2rem;
249
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
250
  }
251
  .app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; }
252
+ .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; font-weight: 500;}
253
  .output-box { border: 1px solid #e2e8f0 !important; border-radius: 1rem !important; }
254
  """
255
 
 
258
  """
259
  <div class="app-header">
260
  <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
261
+ <p>Advancing linguistic rights with state-of-the-art document parsing</p>
262
  </div>
263
  """
264
  )
265
 
266
  with gr.Row(elem_classes=["notice"]):
267
+ status_text = "Initialized" if pipeline else "Initializing/Failed"
268
+ gr.Markdown(f"**⚡ Status:** {status_text} | **Model:** `{CUSTOM_MODEL_PATH}` | **Hardware:** CPU")
269
 
270
  with gr.Tabs():
 
271
  with gr.Tab("📄 Full Document Parsing"):
272
  with gr.Row():
273
  with gr.Column(scale=5):
274
+ file_doc = gr.File(label="Upload Document", type="filepath")
275
  preview_doc_html = gr.HTML(visible=False)
276
+ btn_parse = gr.Button("🔍 Start Parsing", variant="primary")
277
  with gr.Row():
278
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
279
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
 
295
 
296
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
297
 
 
298
  with gr.Tab("🧩 Specific Recognition"):
299
  with gr.Row():
300
  with gr.Column(scale=5):
301
  file_vl = gr.File(label="Upload Element", type="filepath")
302
  preview_vl_html = gr.HTML(visible=False)
303
  with gr.Row():
304
+ btn_ocr = gr.Button("Text", variant="secondary")
305
  btn_formula = gr.Button("Formula", variant="secondary")
306
  btn_table = gr.Button("Table", variant="secondary")
307
 
 
321
  for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
322
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
323
 
 
324
  with gr.Tab("📍 Feature Spotting"):
325
  with gr.Row():
326
  with gr.Column(scale=5):
 
331
  with gr.Column(scale=7):
332
  with gr.Tabs():
333
  with gr.Tab("🖼️ Detection"):
334
+ vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Visual detection here.</div>')
335
  with gr.Tab("💾 JSON Feed"):
336
  json_spot = gr.Code(label="JSON", language="json")
337
 
 
343
 
344
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
345
 
346
+ gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")
347
 
348
  if __name__ == "__main__":
349
  demo.queue().launch()
default.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Serving:
2
+ extra:
3
+ max_num_input_imgs: null
4
+ SubModules:
5
+ LayoutDetection:
6
+ batch_size: 8
7
+ layout_merge_bboxes_mode:
8
+ 0: union
9
+ 1: union
10
+ 2: union
11
+ 3: large
12
+ 4: union
13
+ 5: large
14
+ 6: large
15
+ 7: union
16
+ 8: union
17
+ 9: union
18
+ 10: union
19
+ 11: union
20
+ 12: union
21
+ 13: union
22
+ 14: union
23
+ 15: large
24
+ 16: union
25
+ 17: large
26
+ 18: union
27
+ 19: union
28
+ 20: union
29
+ 21: union
30
+ 22: union
31
+ 23: union
32
+ 24: union
33
+ layout_nms: true
34
+ layout_unclip_ratio:
35
+ - 1.0
36
+ - 1.0
37
+ model_dir: null
38
+ model_name: PP-DocLayoutV2
39
+ module_name: layout_detection
40
+ threshold:
41
+ 0: 0.5
42
+ 1: 0.5
43
+ 2: 0.5
44
+ 3: 0.5
45
+ 4: 0.5
46
+ 5: 0.4
47
+ 6: 0.4
48
+ 7: 0.5
49
+ 8: 0.5
50
+ 9: 0.5
51
+ 10: 0.5
52
+ 11: 0.5
53
+ 12: 0.5
54
+ 13: 0.5
55
+ 14: 0.5
56
+ 15: 0.4
57
+ 16: 0.5
58
+ 17: 0.4
59
+ 18: 0.5
60
+ 19: 0.5
61
+ 20: 0.45
62
+ 21: 0.5
63
+ 22: 0.4
64
+ 23: 0.4
65
+ 24: 0.5
66
+ VLRecognition:
67
+ batch_size: -1
68
+ genai_config:
69
+ backend: native
70
+ model_dir: tachiwin/Tachiwin-OCR-1.5
71
+ model_name: PaddleOCR-VL-0.9B
72
+ module_name: vl_recognition
73
+ SubPipelines:
74
+ DocPreprocessor:
75
+ SubModules:
76
+ DocOrientationClassify:
77
+ batch_size: 8
78
+ model_dir: null
79
+ model_name: PP-LCNet_x1_0_doc_ori
80
+ module_name: doc_text_orientation
81
+ DocUnwarping:
82
+ model_dir: null
83
+ model_name: UVDoc
84
+ module_name: image_unwarping
85
+ batch_size: 8
86
+ pipeline_name: doc_preprocessor
87
+ use_doc_orientation_classify: true
88
+ use_doc_unwarping: true
89
+ batch_size: 64
90
+ format_block_content: false
91
+ markdown_ignore_labels:
92
+ - number
93
+ - footnote
94
+ - header
95
+ - header_image
96
+ - footer
97
+ - footer_image
98
+ - aside_text
99
+ merge_layout_blocks: true
100
+ pipeline_name: PaddleOCR-VL
101
+ use_chart_recognition: false
102
+ use_doc_preprocessor: false
103
+ use_layout_detection: true
104
+ use_queues: true