pdf_gemini

Sleeping

App Files Files Community

Sebbe33 commited on Feb 16, 2025

Commit

316d102

verified ·

1 Parent(s): 19469f5

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -7,6 +7,15 @@ from google import genai
 from google.genai import types
 from pdf2image import convert_from_bytes
 # Helper functions
 def parse_list_boxes(text):
     """Extracts bounding boxes from response text"""
@@ -15,15 +24,15 @@ def parse_list_boxes(text):
     return [[float(m) for m in match] for match in matches]
 def draw_bounding_boxes(image, boxes):
-    """Draws bounding boxes on the image"""
     draw = ImageDraw.Draw(image)
     width, height = image.size
     for box in boxes:
-        ymin = max(0.0, min(1.0, box[0]))
-        xmin = max(0.0, min(1.0, box[1]))
-        ymax = max(0.0, min(1.0, box[2]))
-        xmax = max(0.0, min(1.0, box[3]))
         draw.rectangle([
             xmin * width,
@@ -63,13 +72,8 @@ with col1:
                             mime_type="image/png"
                         )
-                        # Get topic boxes
-                        detection_prompt = (
-                            f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
-                            "Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
-                            "als reine Python-Liste ohne weiteren Text. "
-                            "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
-                        )
                         box_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
                             contents=[detection_prompt, image_part]
@@ -88,7 +92,7 @@ with col1:
                             st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
                             boxes = []
-                        # Draw boxes
                         annotated_image = image.copy()
                         if boxes:
                             annotated_image = draw_bounding_boxes(annotated_image, boxes)

 from google.genai import types
 from pdf2image import convert_from_bytes
+# Constants
+GET_NODE_BOUNDING_BOXES_PROMPT = """\
+Please provide me strict bounding boxes that encompasses the following text in the attached image? I'm trying to draw a rectangle around the text.
+- Use the top-left coordinate system
+- Values should be percentages of the image width and height (0 to 1)
+{nodes}
+"""
 # Helper functions
 def parse_list_boxes(text):
     """Extracts bounding boxes from response text"""
     return [[float(m) for m in match] for match in matches]
 def draw_bounding_boxes(image, boxes):
+    """Draws bounding boxes on the image using [xmin, ymin, xmax, ymax] format"""
     draw = ImageDraw.Draw(image)
     width, height = image.size
     for box in boxes:
+        xmin = max(0.0, min(1.0, box[0]))
+        ymin = max(0.0, min(1.0, box[1]))
+        xmax = max(0.0, min(1.0, box[2]))
+        ymax = max(0.0, min(1.0, box[3]))
         draw.rectangle([
             xmin * width,
                             mime_type="image/png"
                         )
+                        # Get topic boxes using new prompt
+                        detection_prompt = GET_NODE_BOUNDING_BOXES_PROMPT.format(nodes=topic_name)
                         box_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
                             contents=[detection_prompt, image_part]
                             st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
                             boxes = []
+                        # Draw boxes with corrected coordinates
                         annotated_image = image.copy()
                         if boxes:
                             annotated_image = draw_bounding_boxes(annotated_image, boxes)