Spaces:

Arivara
/

Research_RAG_Agent

Sleeping

App Files Files Community

Arivara commited on Jun 25, 2025

Commit

2d9bf5d

verified ·

1 Parent(s): b33629e

Update RAG_AGENT.py

Browse files

Files changed (1) hide show

RAG_AGENT.py +82 -82

RAG_AGENT.py CHANGED Viewed

@@ -1,82 +1,82 @@
-from typing import Optional
-from PIL import Image
-import pdfplumber
-import re
-import os
-from dotenv import load_dotenv
-from google import genai
-from google.genai import types
-# Load environment variables
-load_dotenv()
-# Get API key and model name from environment variables
-GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
-GEMINI_MODEL_NAME = os.getenv('GEMINI_MODEL_NAME', 'gemini-2.5-flash')
-# Configure Gemini
-if GEMINI_API_KEY:
-    client = genai.Client(api_key=GEMINI_API_KEY)
-else:
-    client = None
-# Constants
-PDF_TEXT_LIMIT = 10000  # Limit PDF text to 10k characters
-# Initialize Gemini model (you'll need to set up your API key)
-# from google.generativeai import GenerativeModel
-# gemini_model = GenerativeModel('gemini-pro-vision')
-def extract_clean_pdf_text(pdf_path: str) -> str:
-    """
-    Extracts and cleans text from a PDF file.
-    Args:
-        pdf_path (str): Path to the PDF file.
-    Returns:
-        str: Cleaned text extracted from the PDF.
-    """
-    text = []
-    with pdfplumber.open(pdf_path) as pdf:
-        for page in pdf.pages:
-            page_text = page.extract_text() or ""
-            text.append(page_text)
-    full_text = "\n".join(text)
-    # Clean up: remove excessive whitespace and newlines
-    cleaned_text = re.sub(r'\s+', ' ', full_text).strip()
-    return cleaned_text
-def gemini_explain_file(file, question: Optional[str] = None) -> str:
-    if not file: return "⚠️ No file uploaded."
-    if not client:
-        return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY environment variable."
-    try:
-        file_path = file if isinstance(file, str) else file.name
-        if file_path.lower().endswith((".png", ".jpg", ".jpeg")):
-            img = Image.open(file_path)
-            prompt = f"Explain the science in this image. If there's a specific question, address it: {question}" if question else "Explain the science in this image."
-            response = client.models.generate_content(
-                model=GEMINI_MODEL_NAME,
-                contents=[prompt, img],
-                config=types.GenerateContentConfig(
-                    thinking_config=types.ThinkingConfig(thinking_budget=0)
-                )
-            )
-            return response.text or "No response generated"
-        elif file_path.lower().endswith(".pdf"):
-            with pdfplumber.open(file_path) as pdf:
-                text = "\n".join(page.extract_text() or "" for page in pdf.pages)
-            prompt = f"Explain the science in this PDF, focusing on this question: {question}\n\nPDF Content:\n{text[:PDF_TEXT_LIMIT]}" if question else f"Summarize and explain the science in this PDF:\n\n{text[:PDF_TEXT_LIMIT]}"
-            response = client.models.generate_content(
-                model=GEMINI_MODEL_NAME,
-                contents=prompt,
-                config=types.GenerateContentConfig(
-                    thinking_config=types.ThinkingConfig(thinking_budget=0)
-                )
-            )
-            return response.text or "No response generated"
-        else:
-            return "⚠️ Unsupported file type."
-    except Exception as e:
-        return f"❌ Gemini Error: {e}"

+from typing import Optional
+from PIL import Image
+import pdfplumber
+import re
+import os
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+# Load environment variables
+load_dotenv()
+# Get API key and model name from environment variables
+GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
+GEMINI_MODEL_NAME = 'gemini-2.5-flash')
+# Configure Gemini
+if GEMINI_API_KEY:
+    client = genai.Client(api_key=GEMINI_API_KEY)
+else:
+    client = None
+# Constants
+PDF_TEXT_LIMIT = 10000  # Limit PDF text to 10k characters
+# Initialize Gemini model (you'll need to set up your API key)
+# from google.generativeai import GenerativeModel
+# gemini_model = GenerativeModel('gemini-pro-vision')
+def extract_clean_pdf_text(pdf_path: str) -> str:
+    """
+    Extracts and cleans text from a PDF file.
+    Args:
+        pdf_path (str): Path to the PDF file.
+    Returns:
+        str: Cleaned text extracted from the PDF.
+    """
+    text = []
+    with pdfplumber.open(pdf_path) as pdf:
+        for page in pdf.pages:
+            page_text = page.extract_text() or ""
+            text.append(page_text)
+    full_text = "\n".join(text)
+    # Clean up: remove excessive whitespace and newlines
+    cleaned_text = re.sub(r'\s+', ' ', full_text).strip()
+    return cleaned_text
+def gemini_explain_file(file, question: Optional[str] = None) -> str:
+    if not file: return "⚠️ No file uploaded."
+    if not client:
+        return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY environment variable."
+    try:
+        file_path = file if isinstance(file, str) else file.name
+        if file_path.lower().endswith((".png", ".jpg", ".jpeg")):
+            img = Image.open(file_path)
+            prompt = f"Explain the science in this image. If there's a specific question, address it: {question}" if question else "Explain the science in this image."
+            response = client.models.generate_content(
+                model=GEMINI_MODEL_NAME,
+                contents=[prompt, img],
+                config=types.GenerateContentConfig(
+                    thinking_config=types.ThinkingConfig(thinking_budget=0)
+                )
+            )
+            return response.text or "No response generated"
+        elif file_path.lower().endswith(".pdf"):
+            with pdfplumber.open(file_path) as pdf:
+                text = "\n".join(page.extract_text() or "" for page in pdf.pages)
+            prompt = f"Explain the science in this PDF, focusing on this question: {question}\n\nPDF Content:\n{text[:PDF_TEXT_LIMIT]}" if question else f"Summarize and explain the science in this PDF:\n\n{text[:PDF_TEXT_LIMIT]}"
+            response = client.models.generate_content(
+                model=GEMINI_MODEL_NAME,
+                contents=prompt,
+                config=types.GenerateContentConfig(
+                    thinking_config=types.ThinkingConfig(thinking_budget=0)
+                )
+            )
+            return response.text or "No response generated"
+        else:
+            return "⚠️ Unsupported file type."
+    except Exception as e:
+        return f"❌ Gemini Error: {e}"