heerjtdev commited on
Commit
1572b2d
·
verified ·
1 Parent(s): 147a1d6

Update working_yolo_pipeline.py

Browse files
Files changed (1) hide show
  1. working_yolo_pipeline.py +60 -9
working_yolo_pipeline.py CHANGED
@@ -224,10 +224,60 @@ except Exception as e:
224
 
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def get_latex_from_base64(base64_string: str) -> str:
228
  """
229
  Decodes a Base64 image string, uses Pix2Text to recognize the formula,
230
- and returns the LaTeX code, stripped of all whitespace, as requested.
 
231
  """
232
  if p2t is None:
233
  return "[P2T_ERROR: Model not initialized]"
@@ -254,17 +304,19 @@ def get_latex_from_base64(base64_string: str) -> str:
254
  # Join with a space first, then clean all whitespace
255
  extracted_latex = " ".join(extracted_latex_parts).strip()
256
 
257
- # *** CORE CHANGE: Remove all spaces/line breaks as requested by the user ***
258
- # This uses regex to replace any sequence of whitespace characters (spaces, tabs, newlines) with an empty string.
259
  cleaned_latex = re.sub(r'\s+', '', extracted_latex)
260
- cleaned_latex = re.sub('\\', '\', cleaned_latex)
261
-
262
 
263
  if not cleaned_latex:
264
  return "[P2T_WARNING: No formula found]"
265
-
266
- # Return the clean LaTeX string without wrapping $$, as requested.
267
- return cleaned_latex
 
 
 
 
 
268
 
269
  except Exception as e:
270
  # Catch any unexpected errors
@@ -278,7 +330,6 @@ def get_latex_from_base64(base64_string: str) -> str:
278
 
279
 
280
 
281
-
282
  # # Initialize the YOLO model
283
  # model = YOLO(WEIGHTS_PATH)
284
 
 
224
 
225
 
226
 
227
+ # def get_latex_from_base64(base64_string: str) -> str:
228
+ # """
229
+ # Decodes a Base64 image string, uses Pix2Text to recognize the formula,
230
+ # and returns the LaTeX code, stripped of all whitespace, as requested.
231
+ # """
232
+ # if p2t is None:
233
+ # return "[P2T_ERROR: Model not initialized]"
234
+
235
+ # try:
236
+ # # 1. Decode Base64 to Image
237
+ # image_data = base64.b64decode(base64_string)
238
+ # image = Image.open(io.BytesIO(image_data))
239
+
240
+ # # 2. Recognize text and formulas
241
+ # # Use keep_original_image=False to save memory
242
+ # result = p2t.recognize(image, save_formula_images=False, use_analyzer=True, keep_original_image=False)
243
+
244
+ # # 3. Parse the result for LaTeX
245
+ # extracted_latex_parts = []
246
+ # if isinstance(result, list):
247
+ # for item in result:
248
+ # # Use .text for structured output, item itself for string output
249
+ # text = item.text if hasattr(item, 'text') else str(item)
250
+ # extracted_latex_parts.append(text)
251
+ # elif isinstance(result, str):
252
+ # extracted_latex_parts = [result]
253
+
254
+ # # Join with a space first, then clean all whitespace
255
+ # extracted_latex = " ".join(extracted_latex_parts).strip()
256
+
257
+ # # *** CORE CHANGE: Remove all spaces/line breaks as requested by the user ***
258
+ # # This uses regex to replace any sequence of whitespace characters (spaces, tabs, newlines) with an empty string.
259
+ # cleaned_latex = re.sub(r'\s+', '', extracted_latex)
260
+ # cleaned_latex = re.sub('\\', '\', cleaned_latex)
261
+
262
+
263
+ # if not cleaned_latex:
264
+ # return "[P2T_WARNING: No formula found]"
265
+
266
+ # # Return the clean LaTeX string without wrapping $$, as requested.
267
+ # return cleaned_latex
268
+
269
+ # except Exception as e:
270
+ # # Catch any unexpected errors
271
+ # print(f" ❌ Pix2Text Recognition failed: {e}")
272
+ # return f"[P2T_ERROR: Recognition failed: {e}]"
273
+
274
+
275
+
276
  def get_latex_from_base64(base64_string: str) -> str:
277
  """
278
  Decodes a Base64 image string, uses Pix2Text to recognize the formula,
279
+ and returns the LaTeX code, stripped of all whitespace, as requested,
280
+ and corrects unintended double backslashes.
281
  """
282
  if p2t is None:
283
  return "[P2T_ERROR: Model not initialized]"
 
304
  # Join with a space first, then clean all whitespace
305
  extracted_latex = " ".join(extracted_latex_parts).strip()
306
 
307
+ # *** CORE CHANGE 1: Remove all spaces/line breaks ***
 
308
  cleaned_latex = re.sub(r'\s+', '', extracted_latex)
 
 
309
 
310
  if not cleaned_latex:
311
  return "[P2T_WARNING: No formula found]"
312
+
313
+ # *** CORE CHANGE 2: Fix unintended double backslashes for LaTeX rendering ***
314
+ # This replaces every sequence of two literal backslashes ('\\') with one literal backslash ('\'),
315
+ # ensuring LaTeX commands like '\frac' are correctly formed.
316
+ final_latex = cleaned_latex.replace('\\\\', '\\')
317
+
318
+ # Return the clean and corrected LaTeX string.
319
+ return final_latex
320
 
321
  except Exception as e:
322
  # Catch any unexpected errors
 
330
 
331
 
332
 
 
333
  # # Initialize the YOLO model
334
  # model = YOLO(WEIGHTS_PATH)
335