Spaces:

yuvabe-ai
/

colpali

Sleeping

App Files Files Community

Thamaraikannan commited on Oct 10, 2025

Commit

8d75571

verified ·

1 Parent(s): 04fc0fb

Update src/llm.py

Browse files

Files changed (1) hide show

src/llm.py +124 -108

src/llm.py CHANGED Viewed

@@ -1,108 +1,124 @@
-import streamlit as st
-import google.generativeai as genai
-import requests
-from PIL import Image
-import io
-import base64
-genai.configure(api_key="AIzaSyAjz4MluTeLnMogaO5wY8B2UAxWK1RaYHE")
-client = genai.GenerativeModel("gemini-2.5-flash")
-def fetch_image_as_pil(image_url):
-    try:
-        response = requests.get(image_url, timeout=10)
-        response.raise_for_status()
-        return Image.open(io.BytesIO(response.content))
-    except Exception as e:
-        st.warning(f"Failed to fetch image from {image_url}: {e}")
-        return None
-def analyze_image_with_query(relevant_docs, query):
-    try:
-        if not relevant_docs or len(relevant_docs) == 0:
-            st.warning("⚠️ No relevant documents found for analysis.")
-            return "No relevant images found to analyze."
-        # Collect images and context
-        image_parts = []
-        context_texts = []
-        for i, doc in enumerate(relevant_docs[:5], 1):  # top_k = 5
-            metadata = doc.get("metadata", {})
-            image_url = metadata.get("image_url")
-            company = metadata.get("company", "Unknown")
-            fiscal_year = metadata.get("fiscal_year", "Unknown")
-            page_number = metadata.get("page_number", "Unknown")
-            if image_url:
-                pil_image = fetch_image_as_pil(image_url)
-                if pil_image:
-                    image_parts.append(pil_image)
-                    context_texts.append(f"[DOC {i}] {company} ({fiscal_year}) - Page {page_number}")
-        if not image_parts:
-            st.warning("⚠️ No images could be loaded from the retrieved documents.")
-            return "No images found to analyze."
-        context_info = "\n".join(context_texts)
-        st.info(f"Analyzing {len(image_parts)} images using Gemini...")
-        # Build structured content with roles
-        contents = [
-            {
-                "role": "model",
-                "parts": [
-                    {
-                        "text": (
-                            """You are a professional assistant that answers user questions based **only on the content of provided document excerpts**. The user will ask a question, and you will also receive related text chunks retrieved from company documents or PDFs.
-                        Instructions:
-                        1. Use **only** the retrieved chunks to answer the user’s question. Do **not** add information from memory or outside sources.
-                        2. If multiple chunks provide relevant info, combine them into a **clear, concise answer**.
-                        3. If the answer is **not found** in the chunks, respond exactly with: "The document does not provide enough information to answer this question."
-                        4. Keep the style **professional, factual, and concise**.
-                        5. retrun the response as markdown format
-                        7. Refuse to answer or speculate if no reliable evidence is found in the chunks."""
-                        )
-                    }
-                ]
-            },
-            {
-                "role": "user",
-                "parts": [
-                    {
-                        "text": (f"""
-                            You will answer the question using ONLY the provided document excerpts.
-                            When you use information from a document, cite it with the format [DOC i],
-                            where i corresponds to the document number given in CONTEXT DOCUMENTS.
-                            If multiple docs are relevant, cite all of them (e.g., [DOC 1][DOC 3]).
-                            Context documents:\n{context_info}\n\n
-                            Question: {query}"""
-                        )
-                    }
-                ]
-            }
-        ]
-        def pil_image_to_blob(pil_image):
-            """Convert PIL image to base64 blob for Gemini."""
-            import io
-            buf = io.BytesIO()
-            pil_image.save(buf, format="PNG")
-            byte_data = buf.getvalue()
-            b64_data = base64.b64encode(byte_data).decode("utf-8")
-            return {"mime_type": "image/png", "data": b64_data}
-        # Append all images properly
-        for pil_image in image_parts:
-            contents[1]["parts"].append(pil_image_to_blob(pil_image))
-        # Send request to Gemini
-        response = client.generate_content(contents)
-        return response.text.strip() if response and hasattr(response, "text") else "No response received."
-    except Exception as e:
-        st.error(f"Gemini analysis error: {e}")
-        return None

+import streamlit as st
+from groq import Groq
+import requests
+from PIL import Image
+import io
+import base64
+# Initialize Groq client
+groq_client = Groq(api_key="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")
+def fetch_image_as_pil(image_url):
+    try:
+        response = requests.get(image_url, timeout=10)
+        response.raise_for_status()
+        return Image.open(io.BytesIO(response.content))
+    except Exception as e:
+        st.warning(f"Failed to fetch image from {image_url}: {e}")
+        return None
+def pil_image_to_data_url(pil_image):
+    """Convert PIL image to base64 data URL for Groq."""
+    buf = io.BytesIO()
+    pil_image.save(buf, format="PNG")
+    byte_data = buf.getvalue()
+    b64_data = base64.b64encode(byte_data).decode("utf-8")
+    return f"data:image/png;base64,{b64_data}"
+def analyze_image_with_query(relevant_docs, query):
+    try:
+        if not relevant_docs or len(relevant_docs) == 0:
+            st.warning("⚠️ No relevant documents found for analysis.")
+            return "No relevant images found to analyze."
+        # Collect images and context
+        image_data_urls = []
+        context_texts = []
+        for i, doc in enumerate(relevant_docs[:5], 1):  # top_k = 5
+            metadata = doc.get("metadata", {})
+            image_url = metadata.get("image_url")
+            company = metadata.get("company", "Unknown")
+            fiscal_year = metadata.get("fiscal_year", "Unknown")
+            page_number = metadata.get("page_number", "Unknown")
+            if image_url:
+                pil_image = fetch_image_as_pil(image_url)
+                if pil_image:
+                    image_data_urls.append(pil_image_to_data_url(pil_image))
+                    context_texts.append(f"[DOC {i}] {company} ({fiscal_year}) - Page {page_number}")
+        if not image_data_urls:
+            st.warning("⚠️ No images could be loaded from the retrieved documents.")
+            return "No images found to analyze."
+        context_info = "\n".join(context_texts)
+        st.info(f"Analyzing {len(image_data_urls)} images using Groq...")
+        # Build the system prompt
+        system_prompt = """You are a professional assistant that answers user questions based **only on the content of provided document excerpts**. The user will ask a question, and you will also receive related text chunks retrieved from company documents or PDFs.
+Instructions:
+1. Use **only** the retrieved chunks to answer the user's question. Do **not** add information from memory or outside sources.
+2. If multiple chunks provide relevant info, combine them into a **clear, concise answer**.
+3. If the answer is **not found** in the chunks, respond exactly with: "The document does not provide enough information to answer this question."
+4. Keep the style **professional, factual, and concise**.
+5. Return the response as markdown format.
+6. Refuse to answer or speculate if no reliable evidence is found in the chunks."""
+        # Build user message with images
+        user_content = [
+            {
+                "type": "text",
+                "text": f"""You will answer the question using ONLY the provided document excerpts.
+When you use information from a document, cite it with the format [DOC i],
+where i corresponds to the document number given in CONTEXT DOCUMENTS.
+If multiple docs are relevant, cite all of them (e.g., [DOC 1][DOC 3]).
+Context documents:
+{context_info}
+Question: {query}"""
+            }
+        ]
+        # Add all images to the user message
+        for image_url in image_data_urls:
+            user_content.append({
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url
+                }
+            })
+        # Create messages for Groq
+        messages = [
+            {
+                "role": "system",
+                "content": system_prompt
+            },
+            {
+                "role": "user",
+                "content": user_content
+            }
+        ]
+        # Send request to Groq
+        completion = groq_client.chat.completions.create(
+            model="meta-llama/llama-4-scout-17b-16e-instruct",
+            messages=messages,
+            temperature=0.3,
+            max_completion_tokens=2048,
+            top_p=1,
+            stream=False
+        )
+        response_text = completion.choices[0].message.content
+        return response_text.strip() if response_text else "No response received."
+    except Exception as e:
+        st.error(f"Groq analysis error: {e}")
+        return None