pdf_gemini

Sleeping

App Files Files Community

Sebbe33 commited on Feb 16, 2025

Commit

6018547

verified ·

1 Parent(s): 316d102

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -24

app.py CHANGED Viewed

@@ -8,12 +8,10 @@ from google.genai import types
 from pdf2image import convert_from_bytes
 # Constants
-GET_NODE_BOUNDING_BOXES_PROMPT = """\
-Please provide me strict bounding boxes that encompasses the following text in the attached image? I'm trying to draw a rectangle around the text.
-- Use the top-left coordinate system
-- Values should be percentages of the image width and height (0 to 1)
-{nodes}
 """
 # Helper functions
@@ -39,20 +37,19 @@ def draw_bounding_boxes(image, boxes):
             ymin * height,
             xmax * width,
             ymax * height
-        ], outline="#00FF00", width=3)
     return image
 # Streamlit UI
-st.title("PDF Themenerkennung mit Gemini")
 col1, col2 = st.columns(2)
 with col1:
-    uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
-    topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")
-    if uploaded_file and topic_name:
-        if st.button("Analysieren"):
-            with st.spinner("Analysiere PDF..."):
                 try:
                     # Convert PDF to images
                     pdf_bytes = uploaded_file.read()
@@ -72,27 +69,26 @@ with col1:
                             mime_type="image/png"
                         )
-                        # Get topic boxes using new prompt
-                        detection_prompt = GET_NODE_BOUNDING_BOXES_PROMPT.format(nodes=topic_name)
                         box_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
-                            contents=[detection_prompt, image_part]
                         )
                         # Get description
                         desc_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
-                            contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
                         )
                         # Process boxes
                         try:
                             boxes = parse_list_boxes(box_response.text)
                         except Exception as e:
-                            st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
                             boxes = []
-                        # Draw boxes with corrected coordinates
                         annotated_image = image.copy()
                         if boxes:
                             annotated_image = draw_bounding_boxes(annotated_image, boxes)
@@ -106,15 +102,15 @@ with col1:
                     # Display results
                     with col2:
-                        st.write(f"## Ergebnisse ({len(results)} Seiten)")
-                        tabs = st.tabs([f"Seite {res['page']}" for res in results])
                         for tab, res in zip(tabs, results):
                             with tab:
                                 st.image(res["image"],
-                                       caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
                                        use_container_width=True)
-                                st.write("**Beschreibung:**", res["description"])
                 except Exception as e:
-                    st.error(f"Fehler: {str(e)}")

 from pdf2image import convert_from_bytes
 # Constants
+DETECTION_PROMPT = """\
+Identify all text regions in this document. Provide bounding boxes in the format [xmin, ymin, xmax, ymax]
+as percentages of the image dimensions. Return only a Python-style list of lists without any additional text.
+Example: [[0.1, 0.2, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9]]
 """
 # Helper functions
             ymin * height,
             xmax * width,
             ymax * height
+        ], outline="#00FF00", width=2)
     return image
 # Streamlit UI
+st.title("PDF Text Region Detection with Gemini")
 col1, col2 = st.columns(2)
 with col1:
+    uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
+    if uploaded_file:
+        if st.button("Analyze Document"):
+            with st.spinner("Analyzing PDF..."):
                 try:
                     # Convert PDF to images
                     pdf_bytes = uploaded_file.read()
                             mime_type="image/png"
                         )
+                        # Get all text boxes
                         box_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
+                            contents=[DETECTION_PROMPT, image_part]
                         )
                         # Get description
                         desc_response = client.models.generate_content(
                             model="gemini-2.0-flash-exp",
+                            contents=["Describe this document section in detail.", image_part]
                         )
                         # Process boxes
                         try:
                             boxes = parse_list_boxes(box_response.text)
                         except Exception as e:
+                            st.error(f"Error on page {page_num+1}: {str(e)}")
                             boxes = []
+                        # Draw boxes
                         annotated_image = image.copy()
                         if boxes:
                             annotated_image = draw_bounding_boxes(annotated_image, boxes)
                     # Display results
                     with col2:
+                        st.write(f"## Results ({len(results)} pages)")
+                        tabs = st.tabs([f"Page {res['page']}" for res in results])
                         for tab, res in zip(tabs, results):
                             with tab:
                                 st.image(res["image"],
+                                       caption=f"Page {res['page']} - {res['boxes']} text regions detected",
                                        use_container_width=True)
+                                st.write("**Description:**", res["description"])
                 except Exception as e:
+                    st.error(f"Error: {str(e)}")