Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -654,47 +654,124 @@ def extract_json_from_llm_response(raw_response: str) -> dict:
|
|
| 654 |
logger.error("Sanitized JSON still invalid:\n%s", json_string)
|
| 655 |
raise
|
| 656 |
|
| 657 |
-
def clean_base64_for_model(raw_b64):
|
| 658 |
-
|
| 659 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
- a PIL Image instance → encodes to PNG/base64
|
| 664 |
-
- a raw base64 string → strips whitespace and data URI prefix
|
| 665 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
if not raw_b64:
|
| 667 |
-
return ""
|
| 668 |
|
| 669 |
-
# 1. If it’s a list, take its first element
|
| 670 |
if isinstance(raw_b64, list):
|
| 671 |
raw_b64 = raw_b64[0] if raw_b64 else ""
|
| 672 |
if not raw_b64:
|
| 673 |
-
return ""
|
| 674 |
|
| 675 |
-
# 2. If it’s a PIL Image, convert to base64
|
| 676 |
if isinstance(raw_b64, Image.Image):
|
| 677 |
buf = io.BytesIO()
|
| 678 |
raw_b64.save(buf, format="PNG")
|
| 679 |
raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
| 680 |
|
| 681 |
-
# 3. At this point it must be a string
|
| 682 |
if not isinstance(raw_b64, str):
|
| 683 |
raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
| 684 |
|
| 685 |
-
#
|
| 686 |
clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
| 687 |
clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
| 688 |
|
| 689 |
-
#
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
|
| 699 |
def format_scratch_pseudo_code(code_string):
|
| 700 |
"""
|
|
|
|
| 654 |
logger.error("Sanitized JSON still invalid:\n%s", json_string)
|
| 655 |
raise
|
| 656 |
|
| 657 |
+
# def clean_base64_for_model(raw_b64):
|
| 658 |
+
# """
|
| 659 |
+
# Normalize input into a valid data:image/png;base64,<payload> string.
|
| 660 |
+
|
| 661 |
+
# Accepts:
|
| 662 |
+
# - a list of base64 strings → picks the first element
|
| 663 |
+
# - a PIL Image instance → encodes to PNG/base64
|
| 664 |
+
# - a raw base64 string → strips whitespace and data URI prefix
|
| 665 |
+
# """
|
| 666 |
+
# if not raw_b64:
|
| 667 |
+
# return ""
|
| 668 |
+
|
| 669 |
+
# # 1. If it’s a list, take its first element
|
| 670 |
+
# if isinstance(raw_b64, list):
|
| 671 |
+
# raw_b64 = raw_b64[0] if raw_b64 else ""
|
| 672 |
+
# if not raw_b64:
|
| 673 |
+
# return ""
|
| 674 |
+
|
| 675 |
+
# # 2. If it’s a PIL Image, convert to base64
|
| 676 |
+
# if isinstance(raw_b64, Image.Image):
|
| 677 |
+
# buf = io.BytesIO()
|
| 678 |
+
# raw_b64.save(buf, format="PNG")
|
| 679 |
+
# raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
| 680 |
+
|
| 681 |
+
# # 3. At this point it must be a string
|
| 682 |
+
# if not isinstance(raw_b64, str):
|
| 683 |
+
# raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
| 684 |
+
|
| 685 |
+
# # 4. Strip any existing data URI prefix, whitespace, or newlines
|
| 686 |
+
# clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
| 687 |
+
# clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
| 688 |
+
|
| 689 |
+
# # 5. Validate it’s proper base64
|
| 690 |
+
# try:
|
| 691 |
+
# base64.b64decode(clean_b64)
|
| 692 |
+
# except Exception as e:
|
| 693 |
+
# logger.error(f"Invalid Base64 passed to model: {e}")
|
| 694 |
+
# raise
|
| 695 |
+
|
| 696 |
+
# # 6. Return with the correct data URI prefix
|
| 697 |
+
# return f"data:image/png;base64,{clean_b64}"
|
| 698 |
|
| 699 |
+
# reducing imagebase64 size if greater than something
|
| 700 |
+
def reduce_image_size_to_limit(clean_b64_str, max_kb=4000):
|
|
|
|
|
|
|
| 701 |
"""
|
| 702 |
+
Reduce an image's size to be as close as possible to max_kb without exceeding it.
|
| 703 |
+
Returns the final base64 string and its size in KB.
|
| 704 |
+
"""
|
| 705 |
+
import re, base64
|
| 706 |
+
from io import BytesIO
|
| 707 |
+
from PIL import Image
|
| 708 |
+
|
| 709 |
+
# Remove the data URI prefix
|
| 710 |
+
base64_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", clean_b64_str)
|
| 711 |
+
image_data = base64.b64decode(base64_data)
|
| 712 |
+
|
| 713 |
+
# Load into PIL
|
| 714 |
+
img = Image.open(BytesIO(image_data))
|
| 715 |
+
|
| 716 |
+
low, high = 20, 95 # reasonable JPEG quality range
|
| 717 |
+
best_b64 = None
|
| 718 |
+
best_size_kb = 0
|
| 719 |
+
|
| 720 |
+
while low <= high:
|
| 721 |
+
mid = (low + high) // 2
|
| 722 |
+
buffer = BytesIO()
|
| 723 |
+
img.save(buffer, format="JPEG", quality=mid)
|
| 724 |
+
size_kb = len(buffer.getvalue()) / 1024
|
| 725 |
+
|
| 726 |
+
if size_kb <= max_kb:
|
| 727 |
+
# This quality is valid, try higher
|
| 728 |
+
best_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 729 |
+
best_size_kb = size_kb
|
| 730 |
+
low = mid + 1
|
| 731 |
+
else:
|
| 732 |
+
# Too big, try lower
|
| 733 |
+
high = mid - 1
|
| 734 |
+
|
| 735 |
+
return f"data:image/jpeg;base64,{best_b64}"
|
| 736 |
+
|
| 737 |
+
#clean the base64 model here
|
| 738 |
+
def clean_base64_for_model(raw_b64):
|
| 739 |
+
import io, base64, re
|
| 740 |
+
from PIL import Image
|
| 741 |
+
|
| 742 |
if not raw_b64:
|
| 743 |
+
return "", ""
|
| 744 |
|
|
|
|
| 745 |
if isinstance(raw_b64, list):
|
| 746 |
raw_b64 = raw_b64[0] if raw_b64 else ""
|
| 747 |
if not raw_b64:
|
| 748 |
+
return "", ""
|
| 749 |
|
|
|
|
| 750 |
if isinstance(raw_b64, Image.Image):
|
| 751 |
buf = io.BytesIO()
|
| 752 |
raw_b64.save(buf, format="PNG")
|
| 753 |
raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
| 754 |
|
|
|
|
| 755 |
if not isinstance(raw_b64, str):
|
| 756 |
raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
| 757 |
|
| 758 |
+
# Remove data URI prefix if present
|
| 759 |
clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
| 760 |
clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
| 761 |
|
| 762 |
+
# Log original size
|
| 763 |
+
original_size = len(clean_b64.encode("utf-8"))
|
| 764 |
+
print(f"Original Base64 size (bytes): {original_size}")
|
| 765 |
+
if original_size> 4000000:
|
| 766 |
+
# Reduce size to under 4 MB
|
| 767 |
+
reduced_b64 = reduce_image_size_to_limit(clean_b64, max_kb=4000)
|
| 768 |
+
clean_b64_2 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", reduced_b64)
|
| 769 |
+
clean_b64_2 = clean_b64_2.replace("\n", "").replace("\r", "").strip()
|
| 770 |
+
reduced_size = len(clean_b64_2.encode("utf-8"))
|
| 771 |
+
print(f"Reduced Base64 size (bytes): {reduced_size}")
|
| 772 |
+
# Return both prefixed and clean reduced versions
|
| 773 |
+
return f"data:image/jpeg;base64,{reduced_b64}"
|
| 774 |
+
return f"data:image/jpeg;base64,{clean_b64}"
|
| 775 |
|
| 776 |
def format_scratch_pseudo_code(code_string):
|
| 777 |
"""
|