Spaces:

aim4composites
/

AIM_InDes_Tool

Sleeping

App Files Files Community

AbhijitClemson commited on Jan 29

Commit

52bab2b

verified ·

1 Parent(s): 2b4fb27

Update src/pages/categorized/page6.py

Browse files

Files changed (1) hide show

src/pages/categorized/page6.py +21 -13

src/pages/categorized/page6.py CHANGED Viewed

@@ -14,13 +14,15 @@ import requests
 import base64
 from typing import Dict, Any, Optional
 from collections import defaultdict
 API_KEY = os.environ.get("GEMINI_API_KEY")
 if not API_KEY:
     st.error("Gemini API key not found. Set GEMINI_API_KEY in Hugging Face Secrets.")
     st.stop()
-API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-09-2025:generateContent?key={API_KEY}"
 SCHEMA = {
     "type": "OBJECT",
@@ -60,6 +62,10 @@ CAP_RE = re.compile(r"^(Fig\.?\s*\d+|Figure\s*\d+)\b", re.IGNORECASE)
 def call_gemini_from_bytes(pdf_bytes: bytes, filename: str) -> Optional[Dict[str, Any]]:
     """Calls Gemini API with PDF bytes"""
     try:
         encoded_file = base64.b64encode(pdf_bytes).decode("utf-8")
         mime_type = "application/pdf"
     except Exception as e:
@@ -140,7 +146,6 @@ def convert_to_dataframe(data: Dict[str, Any]) -> pd.DataFrame:
         })
     return pd.DataFrame(rows)
-# --- IMAGE EXTRACTION LOGIC ---
 def get_page_image(page):
     pix = page.get_pixmap(matrix=fitz.Matrix(DPI/72, DPI/72))
     img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, 3)
@@ -210,7 +215,6 @@ def extract_images(pdf_doc):
             x2, y2 = min(page_w, cx + cw + PADDING), min(page_h, cy + ch + PADDING)
             crop = img_bgr[int(y1):int(y2), int(x1):int(x2)]
-            # Store image data in memory instead of saving to disk
             _, buffer = cv2.imencode('.png', crop)
             img_bytes = buffer.tobytes()
@@ -461,7 +465,10 @@ def main():
     st.title("PDF Material Data & Plot Extractor")
     uploaded_file = st.file_uploader("Upload PDF (Material Datasheet or Research Paper)", type=["pdf"])
     if not uploaded_file:
         st.info("Upload a PDF to extract material data and plots")
@@ -495,22 +502,24 @@ def main():
     tab1, tab2 = st.tabs([" Material Data", " Extracted Plots"])
-    with tempfile.TemporaryDirectory() as tmpdir:
-        pdf_path = os.path.join(tmpdir, uploaded_file.name)
-        with open(pdf_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
         with tab1:
             st.subheader("Material Properties Data")
-            # Only call Gemini once per PDF
             if not st.session_state.pdf_data_extracted:
-                with st.spinner(" Extracting material data..."):
                     with open(pdf_path, "rb") as f:
                         pdf_bytes = f.read()
                     data = call_gemini_from_bytes(pdf_bytes, uploaded_file.name)
                     if data:
                         df = convert_to_dataframe(data)
                         if not df.empty:
@@ -521,7 +530,6 @@ def main():
                             st.warning("No data extracted")
                     else:
                         st.error("Failed to extract data from PDF")
-            # After extraction, or when rerunning, use stored data
             df = st.session_state.pdf_extracted_df
             if not df.empty:
@@ -649,7 +657,7 @@ def main():
                                     img_data = st.session_state.image_results[idx]['image_data'][p_idx]
                                     with cols[p_idx]:
-                                        st.image(img_data['array'], width=img_width, channels="BGR")
                                         if st.button(" Remove", key=f"del_s_{idx}_{p_idx}_{r['page']}"):
                                             del st.session_state.image_results[idx]['image_data'][p_idx]
                                             if len(st.session_state.image_results[idx]['image_data']) == 0:

 import base64
 from typing import Dict, Any, Optional
 from collections import defaultdict
+import google.generativeai as genai
+genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 API_KEY = os.environ.get("GEMINI_API_KEY")
 if not API_KEY:
     st.error("Gemini API key not found. Set GEMINI_API_KEY in Hugging Face Secrets.")
     st.stop()
+API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-preview-09-2025:generateContent?key={API_KEY}"
 SCHEMA = {
     "type": "OBJECT",
 def call_gemini_from_bytes(pdf_bytes: bytes, filename: str) -> Optional[Dict[str, Any]]:
     """Calls Gemini API with PDF bytes"""
     try:
+        if len(pdf_bytes) > 3 * 1024 * 1024:
+            st.error("PDF too large for Gemini demo on Hugging Face (max ~3MB).")
+            return None
         encoded_file = base64.b64encode(pdf_bytes).decode("utf-8")
         mime_type = "application/pdf"
     except Exception as e:
         })
     return pd.DataFrame(rows)
 def get_page_image(page):
     pix = page.get_pixmap(matrix=fitz.Matrix(DPI/72, DPI/72))
     img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, 3)
             x2, y2 = min(page_w, cx + cw + PADDING), min(page_h, cy + ch + PADDING)
             crop = img_bgr[int(y1):int(y2), int(x1):int(x2)]
             _, buffer = cv2.imencode('.png', crop)
             img_bytes = buffer.tobytes()
     st.title("PDF Material Data & Plot Extractor")
     uploaded_file = st.file_uploader("Upload PDF (Material Datasheet or Research Paper)", type=["pdf"])
+    if uploaded_file is not None:
+        if uploaded_file.size > 10 * 1024 * 1024:
+            st.error("PDF too large (max 10MB for demo)")
+            st.stop()
     if not uploaded_file:
         st.info("Upload a PDF to extract material data and plots")
     tab1, tab2 = st.tabs([" Material Data", " Extracted Plots"])
+    #with tempfile.TemporaryDirectory() as tmpdir:
+    #    pdf_path = os.path.join(tmpdir, uploaded_file.name)
+    #    with open(pdf_path, "wb") as f:
+    #        f.write(uploaded_file.getbuffer())
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+        tmp.write(uploaded_file.read())
+        pdf_path = tmp.name
         with tab1:
             st.subheader("Material Properties Data")
             if not st.session_state.pdf_data_extracted:
+                with st.spinner("Extracting material data from PDF…"):
                     with open(pdf_path, "rb") as f:
                         pdf_bytes = f.read()
                     data = call_gemini_from_bytes(pdf_bytes, uploaded_file.name)
                     if data:
                         df = convert_to_dataframe(data)
                         if not df.empty:
                             st.warning("No data extracted")
                     else:
                         st.error("Failed to extract data from PDF")
             df = st.session_state.pdf_extracted_df
             if not df.empty:
                                     img_data = st.session_state.image_results[idx]['image_data'][p_idx]
                                     with cols[p_idx]:
+                                        st.image(img_data['array'], use_container_width=True, channels="BGR")
                                         if st.button(" Remove", key=f"del_s_{idx}_{p_idx}_{r['page']}"):
                                             del st.session_state.image_results[idx]['image_data'][p_idx]
                                             if len(st.session_state.image_results[idx]['image_data']) == 0: