Spaces:
Running
Running
Update working_yolo_pipeline.py
Browse files- working_yolo_pipeline.py +75 -74
working_yolo_pipeline.py
CHANGED
|
@@ -273,11 +273,66 @@ except Exception as e:
|
|
| 273 |
|
| 274 |
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# def get_latex_from_base64(base64_string: str) -> str:
|
| 277 |
# """
|
| 278 |
-
# Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 279 |
-
#
|
| 280 |
-
#
|
| 281 |
# """
|
| 282 |
# if p2t is None:
|
| 283 |
# return "[P2T_ERROR: Model not initialized]"
|
|
@@ -286,40 +341,37 @@ except Exception as e:
|
|
| 286 |
# # 1. Decode Base64 to Image
|
| 287 |
# image_data = base64.b64decode(base64_string)
|
| 288 |
# image = Image.open(io.BytesIO(image_data))
|
| 289 |
-
|
| 290 |
# # 2. Recognize text and formulas
|
| 291 |
-
#
|
| 292 |
-
#
|
| 293 |
-
|
|
|
|
| 294 |
# # 3. Parse the result for LaTeX
|
| 295 |
# extracted_latex_parts = []
|
| 296 |
# if isinstance(result, list):
|
| 297 |
# for item in result:
|
| 298 |
-
# # Use .text for structured output, item itself for string output
|
| 299 |
# text = item.text if hasattr(item, 'text') else str(item)
|
| 300 |
# extracted_latex_parts.append(text)
|
| 301 |
# elif isinstance(result, str):
|
| 302 |
-
#
|
| 303 |
-
|
| 304 |
-
# # Join
|
| 305 |
# extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 306 |
-
|
| 307 |
-
# #
|
| 308 |
# cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
| 309 |
-
|
| 310 |
# if not cleaned_latex:
|
| 311 |
-
#
|
| 312 |
-
|
| 313 |
-
# #
|
| 314 |
-
# # This
|
| 315 |
-
#
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
# # Return the clean and corrected LaTeX string.
|
| 319 |
# return final_latex
|
| 320 |
|
| 321 |
# except Exception as e:
|
| 322 |
-
# # Catch any unexpected errors
|
| 323 |
# print(f" ❌ Pix2Text Recognition failed: {e}")
|
| 324 |
# return f"[P2T_ERROR: Recognition failed: {e}]"
|
| 325 |
|
|
@@ -327,57 +379,6 @@ except Exception as e:
|
|
| 327 |
|
| 328 |
|
| 329 |
|
| 330 |
-
def get_latex_from_base64(base64_string: str) -> str:
|
| 331 |
-
"""
|
| 332 |
-
Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 333 |
-
returns the LaTeX code stripped of all whitespace, and collapses unintended
|
| 334 |
-
repeated backslashes into a single backslash.
|
| 335 |
-
"""
|
| 336 |
-
if p2t is None:
|
| 337 |
-
return "[P2T_ERROR: Model not initialized]"
|
| 338 |
-
|
| 339 |
-
try:
|
| 340 |
-
# 1. Decode Base64 to Image
|
| 341 |
-
image_data = base64.b64decode(base64_string)
|
| 342 |
-
image = Image.open(io.BytesIO(image_data))
|
| 343 |
-
|
| 344 |
-
# 2. Recognize text and formulas
|
| 345 |
-
result = p2t.recognize(
|
| 346 |
-
image, save_formula_images=False, use_analyzer=True, keep_original_image=False
|
| 347 |
-
)
|
| 348 |
-
|
| 349 |
-
# 3. Parse the result for LaTeX
|
| 350 |
-
extracted_latex_parts = []
|
| 351 |
-
if isinstance(result, list):
|
| 352 |
-
for item in result:
|
| 353 |
-
text = item.text if hasattr(item, 'text') else str(item)
|
| 354 |
-
extracted_latex_parts.append(text)
|
| 355 |
-
elif isinstance(result, str):
|
| 356 |
-
extracted_latex_parts = [result]
|
| 357 |
-
|
| 358 |
-
# Join then strip
|
| 359 |
-
extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 360 |
-
|
| 361 |
-
# Remove all whitespace/newlines/tabs as requested
|
| 362 |
-
cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
| 363 |
-
|
| 364 |
-
if not cleaned_latex:
|
| 365 |
-
return "[P2T_WARNING: No formula found]"
|
| 366 |
-
|
| 367 |
-
# COLLAPSE any run of 2 or more backslashes into a single backslash.
|
| 368 |
-
# This handles inputs like '\\\\sqrt' or '\\\\\\frac' robustly.
|
| 369 |
-
final_latex = re.sub(r'\\{2,}', r'\\', cleaned_latex)
|
| 370 |
-
|
| 371 |
-
return final_latex
|
| 372 |
-
|
| 373 |
-
except Exception as e:
|
| 374 |
-
print(f" ❌ Pix2Text Recognition failed: {e}")
|
| 375 |
-
return f"[P2T_ERROR: Recognition failed: {e}]"
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
|
| 382 |
|
| 383 |
# # Initialize the YOLO model
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
|
| 276 |
+
def get_latex_from_base64(base64_string: str) -> str:
|
| 277 |
+
"""
|
| 278 |
+
Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 279 |
+
and returns the LaTeX code, stripped of all whitespace, as requested,
|
| 280 |
+
and corrects unintended double backslashes.
|
| 281 |
+
"""
|
| 282 |
+
if p2t is None:
|
| 283 |
+
return "[P2T_ERROR: Model not initialized]"
|
| 284 |
+
|
| 285 |
+
try:
|
| 286 |
+
# 1. Decode Base64 to Image
|
| 287 |
+
image_data = base64.b64decode(base64_string)
|
| 288 |
+
image = Image.open(io.BytesIO(image_data))
|
| 289 |
+
|
| 290 |
+
# 2. Recognize text and formulas
|
| 291 |
+
# Use keep_original_image=False to save memory
|
| 292 |
+
result = p2t.recognize(image, save_formula_images=False, use_analyzer=True, keep_original_image=False)
|
| 293 |
+
|
| 294 |
+
# 3. Parse the result for LaTeX
|
| 295 |
+
extracted_latex_parts = []
|
| 296 |
+
if isinstance(result, list):
|
| 297 |
+
for item in result:
|
| 298 |
+
# Use .text for structured output, item itself for string output
|
| 299 |
+
text = item.text if hasattr(item, 'text') else str(item)
|
| 300 |
+
extracted_latex_parts.append(text)
|
| 301 |
+
elif isinstance(result, str):
|
| 302 |
+
extracted_latex_parts = [result]
|
| 303 |
+
|
| 304 |
+
# Join with a space first, then clean all whitespace
|
| 305 |
+
extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 306 |
+
|
| 307 |
+
# *** CORE CHANGE 1: Remove all spaces/line breaks ***
|
| 308 |
+
cleaned_latex = extracted_latex.replace('\\\\', '\\')
|
| 309 |
+
final_latex = re.sub(r'\s+', '', cleaned_latex)
|
| 310 |
+
|
| 311 |
+
if not cleaned_latex:
|
| 312 |
+
return "[P2T_WARNING: No formula found]"
|
| 313 |
+
|
| 314 |
+
# *** CORE CHANGE 2: Fix unintended double backslashes for LaTeX rendering ***
|
| 315 |
+
# This replaces every sequence of two literal backslashes ('\\') with one literal backslash ('\'),
|
| 316 |
+
# ensuring LaTeX commands like '\frac' are correctly formed.
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
# Return the clean and corrected LaTeX string.
|
| 320 |
+
return final_latex
|
| 321 |
+
|
| 322 |
+
except Exception as e:
|
| 323 |
+
# Catch any unexpected errors
|
| 324 |
+
print(f" ❌ Pix2Text Recognition failed: {e}")
|
| 325 |
+
return f"[P2T_ERROR: Recognition failed: {e}]"
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
|
| 331 |
# def get_latex_from_base64(base64_string: str) -> str:
|
| 332 |
# """
|
| 333 |
+
# Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 334 |
+
# returns the LaTeX code stripped of all whitespace, and collapses unintended
|
| 335 |
+
# repeated backslashes into a single backslash.
|
| 336 |
# """
|
| 337 |
# if p2t is None:
|
| 338 |
# return "[P2T_ERROR: Model not initialized]"
|
|
|
|
| 341 |
# # 1. Decode Base64 to Image
|
| 342 |
# image_data = base64.b64decode(base64_string)
|
| 343 |
# image = Image.open(io.BytesIO(image_data))
|
| 344 |
+
|
| 345 |
# # 2. Recognize text and formulas
|
| 346 |
+
# result = p2t.recognize(
|
| 347 |
+
# image, save_formula_images=False, use_analyzer=True, keep_original_image=False
|
| 348 |
+
# )
|
| 349 |
+
|
| 350 |
# # 3. Parse the result for LaTeX
|
| 351 |
# extracted_latex_parts = []
|
| 352 |
# if isinstance(result, list):
|
| 353 |
# for item in result:
|
|
|
|
| 354 |
# text = item.text if hasattr(item, 'text') else str(item)
|
| 355 |
# extracted_latex_parts.append(text)
|
| 356 |
# elif isinstance(result, str):
|
| 357 |
+
# extracted_latex_parts = [result]
|
| 358 |
+
|
| 359 |
+
# # Join then strip
|
| 360 |
# extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 361 |
+
|
| 362 |
+
# # Remove all whitespace/newlines/tabs as requested
|
| 363 |
# cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
| 364 |
+
|
| 365 |
# if not cleaned_latex:
|
| 366 |
+
# return "[P2T_WARNING: No formula found]"
|
| 367 |
+
|
| 368 |
+
# # COLLAPSE any run of 2 or more backslashes into a single backslash.
|
| 369 |
+
# # This handles inputs like '\\\\sqrt' or '\\\\\\frac' robustly.
|
| 370 |
+
# final_latex = re.sub(r'\\{2,}', r'\\', cleaned_latex)
|
| 371 |
+
|
|
|
|
|
|
|
| 372 |
# return final_latex
|
| 373 |
|
| 374 |
# except Exception as e:
|
|
|
|
| 375 |
# print(f" ❌ Pix2Text Recognition failed: {e}")
|
| 376 |
# return f"[P2T_ERROR: Recognition failed: {e}]"
|
| 377 |
|
|
|
|
| 379 |
|
| 380 |
|
| 381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
|
| 384 |
# # Initialize the YOLO model
|