Spaces:

Arivara
/

Research_RAG_Agent

Sleeping

App Files Files Community

Arivara commited on Jun 25, 2025

Commit

bbf5dc5

verified ·

1 Parent(s): 6776c39

Upload 3 files

Browse files

Files changed (3) hide show

RAG_AGENT.py +82 -0
gradio_app.py +174 -0
requirements.txt +8 -0

RAG_AGENT.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from typing import Optional
+from PIL import Image
+import pdfplumber
+import re
+import os
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+# Load environment variables
+load_dotenv()
+# Get API key and model name from environment variables
+GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
+GEMINI_MODEL_NAME = os.getenv('GEMINI_MODEL_NAME', 'gemini-2.5-flash')
+# Configure Gemini
+if GEMINI_API_KEY:
+    client = genai.Client(api_key=GEMINI_API_KEY)
+else:
+    client = None
+# Constants
+PDF_TEXT_LIMIT = 10000  # Limit PDF text to 10k characters
+# Initialize Gemini model (you'll need to set up your API key)
+# from google.generativeai import GenerativeModel
+# gemini_model = GenerativeModel('gemini-pro-vision')
+def extract_clean_pdf_text(pdf_path: str) -> str:
+    """
+    Extracts and cleans text from a PDF file.
+    Args:
+        pdf_path (str): Path to the PDF file.
+    Returns:
+        str: Cleaned text extracted from the PDF.
+    """
+    text = []
+    with pdfplumber.open(pdf_path) as pdf:
+        for page in pdf.pages:
+            page_text = page.extract_text() or ""
+            text.append(page_text)
+    full_text = "\n".join(text)
+    # Clean up: remove excessive whitespace and newlines
+    cleaned_text = re.sub(r'\s+', ' ', full_text).strip()
+    return cleaned_text
+def gemini_explain_file(file, question: Optional[str] = None) -> str:
+    if not file: return "⚠️ No file uploaded."
+    if not client:
+        return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY environment variable."
+    try:
+        file_path = file if isinstance(file, str) else file.name
+        if file_path.lower().endswith((".png", ".jpg", ".jpeg")):
+            img = Image.open(file_path)
+            prompt = f"Explain the science in this image. If there's a specific question, address it: {question}" if question else "Explain the science in this image."
+            response = client.models.generate_content(
+                model=GEMINI_MODEL_NAME,
+                contents=[prompt, img],
+                config=types.GenerateContentConfig(
+                    thinking_config=types.ThinkingConfig(thinking_budget=0)
+                )
+            )
+            return response.text or "No response generated"
+        elif file_path.lower().endswith(".pdf"):
+            with pdfplumber.open(file_path) as pdf:
+                text = "\n".join(page.extract_text() or "" for page in pdf.pages)
+            prompt = f"Explain the science in this PDF, focusing on this question: {question}\n\nPDF Content:\n{text[:PDF_TEXT_LIMIT]}" if question else f"Summarize and explain the science in this PDF:\n\n{text[:PDF_TEXT_LIMIT]}"
+            response = client.models.generate_content(
+                model=GEMINI_MODEL_NAME,
+                contents=prompt,
+                config=types.GenerateContentConfig(
+                    thinking_config=types.ThinkingConfig(thinking_budget=0)
+                )
+            )
+            return response.text or "No response generated"
+        else:
+            return "⚠️ Unsupported file type."
+    except Exception as e:
+        return f"❌ Gemini Error: {e}"

gradio_app.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import gradio as gr
+from RAG_AGENT import gemini_explain_file, extract_clean_pdf_text
+import os
+# Custom CSS for minimalist design and better alignment
+css = """
+.gradio-container {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
+    max-width: 800px !important;
+    margin: 0 auto !important;
+}
+.main-header {
+    text-align: center;
+    margin-bottom: 2rem;
+    color: #1a1a1a;
+}
+.main-header h1 {
+    font-size: 2.5rem;
+    font-weight: 700;
+    margin-bottom: 0.5rem;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+}
+.main-header p {
+    font-size: 1.1rem;
+    color: #666;
+    margin: 0;
+}
+.upload-area {
+    border: 2px dashed #e0e0e0 !important;
+    border-radius: 12px !important;
+    background: #fafafa !important;
+    transition: all 0.3s ease;
+}
+.upload-area:hover {
+    border-color: #667eea !important;
+    background: #f8f9ff !important;
+}
+.btn-primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    border-radius: 8px !important;
+    padding: 12px 24px !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+}
+.btn-primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
+}
+.output-box {
+    border-radius: 12px !important;
+    border: 1px solid #e0e0e0 !important;
+    background: #fafafa !important;
+    padding: 1.5rem !important;
+    margin-top: 1rem !important;
+    min-height: 200px;
+    font-size: 1.1rem;
+}
+.footer {
+    text-align: center;
+    margin-top: 2rem;
+    color: #999;
+    font-size: 0.9rem;
+}
+"""
+def analyze_file(file, question, analysis_type):
+    """Main function to handle file analysis"""
+    if not file:
+        return "⚠️ Please upload a file first."
+    if analysis_type == "Extract PDF Text":
+        if not file.name.lower().endswith('.pdf'):
+            return "⚠️ This option only works with PDF files."
+        try:
+            text = extract_clean_pdf_text(file.name)
+            return f"📄 **Extracted Text:**\n\n{text}"
+        except Exception as e:
+            return f"❌ Error extracting text: {e}"
+    elif analysis_type == "AI Analysis":
+        if not os.getenv('GEMINI_API_KEY'):
+            return "⚠️ Gemini API key not configured. Please set GEMINI_API_KEY environment variable."
+        result = gemini_explain_file(file, question)
+        return result
+    else:
+        return "⚠️ Please select an analysis type."
+# Create the Gradio interface
+with gr.Blocks(css=css, title="Science File Analyzer", theme=gr.themes.Soft()) as demo:
+    gr.HTML("""
+        <div class="main-header">
+            <h1>Science File Analyzer</h1>
+            <p>Upload scientific documents and images for AI-powered analysis</p>
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # File upload
+            file_input = gr.File(
+                label="📁 Upload File",
+                file_types=[".pdf", ".png", ".jpg", ".jpeg"],
+                file_count="single",
+                elem_classes=["upload-area"]
+            )
+            # Analysis type selection
+            analysis_type = gr.Radio(
+                choices=["AI Analysis", "Extract PDF Text"],
+                value="AI Analysis",
+                label="🔍 Analysis Type",
+                info="Choose how to process your file"
+            )
+            # Question input (optional)
+            question_input = gr.Textbox(
+                label="❓ Optional Question",
+                placeholder="Ask a specific question about the content...",
+                lines=2,
+                info="Leave empty for general analysis"
+            )
+            # Analyze button
+            analyze_btn = gr.Button(
+                "🚀 Analyze File",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            # Output area (separate component)
+            output_box = gr.Markdown(
+                label="📊 Analysis Results",
+                value="Upload a file and click 'Analyze File' to get started.",
+                elem_classes=["output-box"]
+            )
+    # Footer
+    gr.HTML("""
+        <div class="footer">
+            <p>Powered by Google Gemini AI • Built with Gradio</p>
+        </div>
+    """)
+    # Event handlers
+    analyze_btn.click(
+        fn=analyze_file,
+        inputs=[file_input, question_input, analysis_type],
+        outputs=output_box
+    )
+    # Auto-analyze when file is uploaded
+    file_input.change(
+        fn=lambda: "File uploaded! Click 'Analyze File' to process.",
+        outputs=output_box
+    )
+if __name__ == "__main__":
+    demo.launch(show_error=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+phidata
+openai
+gemini-ai
+pdfplumber
+Pillow
+python-dotenv
+google-genai
+gradio>=3.50.2