Spaces:
Running
Running
Update working_yolo_pipeline.py
Browse files- working_yolo_pipeline.py +60 -9
working_yolo_pipeline.py
CHANGED
|
@@ -224,10 +224,60 @@ except Exception as e:
|
|
| 224 |
|
| 225 |
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
def get_latex_from_base64(base64_string: str) -> str:
|
| 228 |
"""
|
| 229 |
Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 230 |
-
and returns the LaTeX code, stripped of all whitespace, as requested
|
|
|
|
| 231 |
"""
|
| 232 |
if p2t is None:
|
| 233 |
return "[P2T_ERROR: Model not initialized]"
|
|
@@ -254,17 +304,19 @@ def get_latex_from_base64(base64_string: str) -> str:
|
|
| 254 |
# Join with a space first, then clean all whitespace
|
| 255 |
extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 256 |
|
| 257 |
-
# *** CORE CHANGE: Remove all spaces/line breaks
|
| 258 |
-
# This uses regex to replace any sequence of whitespace characters (spaces, tabs, newlines) with an empty string.
|
| 259 |
cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
| 260 |
-
cleaned_latex = re.sub('\\', '\', cleaned_latex)
|
| 261 |
-
|
| 262 |
|
| 263 |
if not cleaned_latex:
|
| 264 |
return "[P2T_WARNING: No formula found]"
|
| 265 |
-
|
| 266 |
-
#
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
except Exception as e:
|
| 270 |
# Catch any unexpected errors
|
|
@@ -278,7 +330,6 @@ def get_latex_from_base64(base64_string: str) -> str:
|
|
| 278 |
|
| 279 |
|
| 280 |
|
| 281 |
-
|
| 282 |
# # Initialize the YOLO model
|
| 283 |
# model = YOLO(WEIGHTS_PATH)
|
| 284 |
|
|
|
|
| 224 |
|
| 225 |
|
| 226 |
|
| 227 |
+
# def get_latex_from_base64(base64_string: str) -> str:
|
| 228 |
+
# """
|
| 229 |
+
# Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 230 |
+
# and returns the LaTeX code, stripped of all whitespace, as requested.
|
| 231 |
+
# """
|
| 232 |
+
# if p2t is None:
|
| 233 |
+
# return "[P2T_ERROR: Model not initialized]"
|
| 234 |
+
|
| 235 |
+
# try:
|
| 236 |
+
# # 1. Decode Base64 to Image
|
| 237 |
+
# image_data = base64.b64decode(base64_string)
|
| 238 |
+
# image = Image.open(io.BytesIO(image_data))
|
| 239 |
+
|
| 240 |
+
# # 2. Recognize text and formulas
|
| 241 |
+
# # Use keep_original_image=False to save memory
|
| 242 |
+
# result = p2t.recognize(image, save_formula_images=False, use_analyzer=True, keep_original_image=False)
|
| 243 |
+
|
| 244 |
+
# # 3. Parse the result for LaTeX
|
| 245 |
+
# extracted_latex_parts = []
|
| 246 |
+
# if isinstance(result, list):
|
| 247 |
+
# for item in result:
|
| 248 |
+
# # Use .text for structured output, item itself for string output
|
| 249 |
+
# text = item.text if hasattr(item, 'text') else str(item)
|
| 250 |
+
# extracted_latex_parts.append(text)
|
| 251 |
+
# elif isinstance(result, str):
|
| 252 |
+
# extracted_latex_parts = [result]
|
| 253 |
+
|
| 254 |
+
# # Join with a space first, then clean all whitespace
|
| 255 |
+
# extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 256 |
+
|
| 257 |
+
# # *** CORE CHANGE: Remove all spaces/line breaks as requested by the user ***
|
| 258 |
+
# # This uses regex to replace any sequence of whitespace characters (spaces, tabs, newlines) with an empty string.
|
| 259 |
+
# cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
| 260 |
+
# cleaned_latex = re.sub('\\', '\', cleaned_latex)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
# if not cleaned_latex:
|
| 264 |
+
# return "[P2T_WARNING: No formula found]"
|
| 265 |
+
|
| 266 |
+
# # Return the clean LaTeX string without wrapping $$, as requested.
|
| 267 |
+
# return cleaned_latex
|
| 268 |
+
|
| 269 |
+
# except Exception as e:
|
| 270 |
+
# # Catch any unexpected errors
|
| 271 |
+
# print(f" ❌ Pix2Text Recognition failed: {e}")
|
| 272 |
+
# return f"[P2T_ERROR: Recognition failed: {e}]"
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
|
| 276 |
def get_latex_from_base64(base64_string: str) -> str:
|
| 277 |
"""
|
| 278 |
Decodes a Base64 image string, uses Pix2Text to recognize the formula,
|
| 279 |
+
and returns the LaTeX code, stripped of all whitespace, as requested,
|
| 280 |
+
and corrects unintended double backslashes.
|
| 281 |
"""
|
| 282 |
if p2t is None:
|
| 283 |
return "[P2T_ERROR: Model not initialized]"
|
|
|
|
| 304 |
# Join with a space first, then clean all whitespace
|
| 305 |
extracted_latex = " ".join(extracted_latex_parts).strip()
|
| 306 |
|
| 307 |
+
# *** CORE CHANGE 1: Remove all spaces/line breaks ***
|
|
|
|
| 308 |
cleaned_latex = re.sub(r'\s+', '', extracted_latex)
|
|
|
|
|
|
|
| 309 |
|
| 310 |
if not cleaned_latex:
|
| 311 |
return "[P2T_WARNING: No formula found]"
|
| 312 |
+
|
| 313 |
+
# *** CORE CHANGE 2: Fix unintended double backslashes for LaTeX rendering ***
|
| 314 |
+
# This replaces every sequence of two literal backslashes ('\\') with one literal backslash ('\'),
|
| 315 |
+
# ensuring LaTeX commands like '\frac' are correctly formed.
|
| 316 |
+
final_latex = cleaned_latex.replace('\\\\', '\\')
|
| 317 |
+
|
| 318 |
+
# Return the clean and corrected LaTeX string.
|
| 319 |
+
return final_latex
|
| 320 |
|
| 321 |
except Exception as e:
|
| 322 |
# Catch any unexpected errors
|
|
|
|
| 330 |
|
| 331 |
|
| 332 |
|
|
|
|
| 333 |
# # Initialize the YOLO model
|
| 334 |
# model = YOLO(WEIGHTS_PATH)
|
| 335 |
|