Sebbe33 commited on
Commit
316d102
·
verified ·
1 Parent(s): 19469f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -7,6 +7,15 @@ from google import genai
7
  from google.genai import types
8
  from pdf2image import convert_from_bytes
9
 
 
 
 
 
 
 
 
 
 
10
  # Helper functions
11
  def parse_list_boxes(text):
12
  """Extracts bounding boxes from response text"""
@@ -15,15 +24,15 @@ def parse_list_boxes(text):
15
  return [[float(m) for m in match] for match in matches]
16
 
17
  def draw_bounding_boxes(image, boxes):
18
- """Draws bounding boxes on the image"""
19
  draw = ImageDraw.Draw(image)
20
  width, height = image.size
21
 
22
  for box in boxes:
23
- ymin = max(0.0, min(1.0, box[0]))
24
- xmin = max(0.0, min(1.0, box[1]))
25
- ymax = max(0.0, min(1.0, box[2]))
26
- xmax = max(0.0, min(1.0, box[3]))
27
 
28
  draw.rectangle([
29
  xmin * width,
@@ -63,13 +72,8 @@ with col1:
63
  mime_type="image/png"
64
  )
65
 
66
- # Get topic boxes
67
- detection_prompt = (
68
- f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
69
- "Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
70
- "als reine Python-Liste ohne weiteren Text. "
71
- "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
72
- )
73
  box_response = client.models.generate_content(
74
  model="gemini-2.0-flash-exp",
75
  contents=[detection_prompt, image_part]
@@ -88,7 +92,7 @@ with col1:
88
  st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
89
  boxes = []
90
 
91
- # Draw boxes
92
  annotated_image = image.copy()
93
  if boxes:
94
  annotated_image = draw_bounding_boxes(annotated_image, boxes)
 
7
  from google.genai import types
8
  from pdf2image import convert_from_bytes
9
 
10
+ # Constants
11
+ GET_NODE_BOUNDING_BOXES_PROMPT = """\
12
+ Please provide me strict bounding boxes that encompasses the following text in the attached image? I'm trying to draw a rectangle around the text.
13
+ - Use the top-left coordinate system
14
+ - Values should be percentages of the image width and height (0 to 1)
15
+
16
+ {nodes}
17
+ """
18
+
19
  # Helper functions
20
  def parse_list_boxes(text):
21
  """Extracts bounding boxes from response text"""
 
24
  return [[float(m) for m in match] for match in matches]
25
 
26
  def draw_bounding_boxes(image, boxes):
27
+ """Draws bounding boxes on the image using [xmin, ymin, xmax, ymax] format"""
28
  draw = ImageDraw.Draw(image)
29
  width, height = image.size
30
 
31
  for box in boxes:
32
+ xmin = max(0.0, min(1.0, box[0]))
33
+ ymin = max(0.0, min(1.0, box[1]))
34
+ xmax = max(0.0, min(1.0, box[2]))
35
+ ymax = max(0.0, min(1.0, box[3]))
36
 
37
  draw.rectangle([
38
  xmin * width,
 
72
  mime_type="image/png"
73
  )
74
 
75
+ # Get topic boxes using new prompt
76
+ detection_prompt = GET_NODE_BOUNDING_BOXES_PROMPT.format(nodes=topic_name)
 
 
 
 
 
77
  box_response = client.models.generate_content(
78
  model="gemini-2.0-flash-exp",
79
  contents=[detection_prompt, image_part]
 
92
  st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
93
  boxes = []
94
 
95
+ # Draw boxes with corrected coordinates
96
  annotated_image = image.copy()
97
  if boxes:
98
  annotated_image = draw_bounding_boxes(annotated_image, boxes)