Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,15 @@ from google import genai
|
|
| 7 |
from google.genai import types
|
| 8 |
from pdf2image import convert_from_bytes
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Helper functions
|
| 11 |
def parse_list_boxes(text):
|
| 12 |
"""Extracts bounding boxes from response text"""
|
|
@@ -15,15 +24,15 @@ def parse_list_boxes(text):
|
|
| 15 |
return [[float(m) for m in match] for match in matches]
|
| 16 |
|
| 17 |
def draw_bounding_boxes(image, boxes):
|
| 18 |
-
"""Draws bounding boxes on the image"""
|
| 19 |
draw = ImageDraw.Draw(image)
|
| 20 |
width, height = image.size
|
| 21 |
|
| 22 |
for box in boxes:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
|
| 28 |
draw.rectangle([
|
| 29 |
xmin * width,
|
|
@@ -63,13 +72,8 @@ with col1:
|
|
| 63 |
mime_type="image/png"
|
| 64 |
)
|
| 65 |
|
| 66 |
-
# Get topic boxes
|
| 67 |
-
detection_prompt = (
|
| 68 |
-
f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
|
| 69 |
-
"Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
|
| 70 |
-
"als reine Python-Liste ohne weiteren Text. "
|
| 71 |
-
"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
|
| 72 |
-
)
|
| 73 |
box_response = client.models.generate_content(
|
| 74 |
model="gemini-2.0-flash-exp",
|
| 75 |
contents=[detection_prompt, image_part]
|
|
@@ -88,7 +92,7 @@ with col1:
|
|
| 88 |
st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
|
| 89 |
boxes = []
|
| 90 |
|
| 91 |
-
# Draw boxes
|
| 92 |
annotated_image = image.copy()
|
| 93 |
if boxes:
|
| 94 |
annotated_image = draw_bounding_boxes(annotated_image, boxes)
|
|
|
|
| 7 |
from google.genai import types
|
| 8 |
from pdf2image import convert_from_bytes
|
| 9 |
|
| 10 |
+
# Constants
|
| 11 |
+
GET_NODE_BOUNDING_BOXES_PROMPT = """\
|
| 12 |
+
Please provide me strict bounding boxes that encompasses the following text in the attached image? I'm trying to draw a rectangle around the text.
|
| 13 |
+
- Use the top-left coordinate system
|
| 14 |
+
- Values should be percentages of the image width and height (0 to 1)
|
| 15 |
+
|
| 16 |
+
{nodes}
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
# Helper functions
|
| 20 |
def parse_list_boxes(text):
|
| 21 |
"""Extracts bounding boxes from response text"""
|
|
|
|
| 24 |
return [[float(m) for m in match] for match in matches]
|
| 25 |
|
| 26 |
def draw_bounding_boxes(image, boxes):
|
| 27 |
+
"""Draws bounding boxes on the image using [xmin, ymin, xmax, ymax] format"""
|
| 28 |
draw = ImageDraw.Draw(image)
|
| 29 |
width, height = image.size
|
| 30 |
|
| 31 |
for box in boxes:
|
| 32 |
+
xmin = max(0.0, min(1.0, box[0]))
|
| 33 |
+
ymin = max(0.0, min(1.0, box[1]))
|
| 34 |
+
xmax = max(0.0, min(1.0, box[2]))
|
| 35 |
+
ymax = max(0.0, min(1.0, box[3]))
|
| 36 |
|
| 37 |
draw.rectangle([
|
| 38 |
xmin * width,
|
|
|
|
| 72 |
mime_type="image/png"
|
| 73 |
)
|
| 74 |
|
| 75 |
+
# Get topic boxes using new prompt
|
| 76 |
+
detection_prompt = GET_NODE_BOUNDING_BOXES_PROMPT.format(nodes=topic_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
box_response = client.models.generate_content(
|
| 78 |
model="gemini-2.0-flash-exp",
|
| 79 |
contents=[detection_prompt, image_part]
|
|
|
|
| 92 |
st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
|
| 93 |
boxes = []
|
| 94 |
|
| 95 |
+
# Draw boxes with corrected coordinates
|
| 96 |
annotated_image = image.copy()
|
| 97 |
if boxes:
|
| 98 |
annotated_image = draw_bounding_boxes(annotated_image, boxes)
|