Arivara commited on
Commit
2d9bf5d
·
verified ·
1 Parent(s): b33629e

Update RAG_AGENT.py

Browse files
Files changed (1) hide show
  1. RAG_AGENT.py +82 -82
RAG_AGENT.py CHANGED
@@ -1,82 +1,82 @@
1
- from typing import Optional
2
- from PIL import Image
3
- import pdfplumber
4
- import re
5
- import os
6
- from dotenv import load_dotenv
7
- from google import genai
8
- from google.genai import types
9
-
10
- # Load environment variables
11
- load_dotenv()
12
-
13
- # Get API key and model name from environment variables
14
- GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
15
- GEMINI_MODEL_NAME = os.getenv('GEMINI_MODEL_NAME', 'gemini-2.5-flash')
16
-
17
- # Configure Gemini
18
- if GEMINI_API_KEY:
19
- client = genai.Client(api_key=GEMINI_API_KEY)
20
- else:
21
- client = None
22
-
23
- # Constants
24
- PDF_TEXT_LIMIT = 10000 # Limit PDF text to 10k characters
25
-
26
- # Initialize Gemini model (you'll need to set up your API key)
27
- # from google.generativeai import GenerativeModel
28
- # gemini_model = GenerativeModel('gemini-pro-vision')
29
-
30
- def extract_clean_pdf_text(pdf_path: str) -> str:
31
- """
32
- Extracts and cleans text from a PDF file.
33
- Args:
34
- pdf_path (str): Path to the PDF file.
35
- Returns:
36
- str: Cleaned text extracted from the PDF.
37
- """
38
- text = []
39
- with pdfplumber.open(pdf_path) as pdf:
40
- for page in pdf.pages:
41
- page_text = page.extract_text() or ""
42
- text.append(page_text)
43
- full_text = "\n".join(text)
44
- # Clean up: remove excessive whitespace and newlines
45
- cleaned_text = re.sub(r'\s+', ' ', full_text).strip()
46
- return cleaned_text
47
-
48
- def gemini_explain_file(file, question: Optional[str] = None) -> str:
49
- if not file: return "⚠️ No file uploaded."
50
- if not client:
51
- return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY environment variable."
52
-
53
- try:
54
- file_path = file if isinstance(file, str) else file.name
55
-
56
- if file_path.lower().endswith((".png", ".jpg", ".jpeg")):
57
- img = Image.open(file_path)
58
- prompt = f"Explain the science in this image. If there's a specific question, address it: {question}" if question else "Explain the science in this image."
59
- response = client.models.generate_content(
60
- model=GEMINI_MODEL_NAME,
61
- contents=[prompt, img],
62
- config=types.GenerateContentConfig(
63
- thinking_config=types.ThinkingConfig(thinking_budget=0)
64
- )
65
- )
66
- return response.text or "No response generated"
67
- elif file_path.lower().endswith(".pdf"):
68
- with pdfplumber.open(file_path) as pdf:
69
- text = "\n".join(page.extract_text() or "" for page in pdf.pages)
70
- prompt = f"Explain the science in this PDF, focusing on this question: {question}\n\nPDF Content:\n{text[:PDF_TEXT_LIMIT]}" if question else f"Summarize and explain the science in this PDF:\n\n{text[:PDF_TEXT_LIMIT]}"
71
- response = client.models.generate_content(
72
- model=GEMINI_MODEL_NAME,
73
- contents=prompt,
74
- config=types.GenerateContentConfig(
75
- thinking_config=types.ThinkingConfig(thinking_budget=0)
76
- )
77
- )
78
- return response.text or "No response generated"
79
- else:
80
- return "⚠️ Unsupported file type."
81
- except Exception as e:
82
- return f"❌ Gemini Error: {e}"
 
1
+ from typing import Optional
2
+ from PIL import Image
3
+ import pdfplumber
4
+ import re
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from google import genai
8
+ from google.genai import types
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Get API key and model name from environment variables
14
+ GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
15
+ GEMINI_MODEL_NAME = 'gemini-2.5-flash')
16
+
17
+ # Configure Gemini
18
+ if GEMINI_API_KEY:
19
+ client = genai.Client(api_key=GEMINI_API_KEY)
20
+ else:
21
+ client = None
22
+
23
+ # Constants
24
+ PDF_TEXT_LIMIT = 10000 # Limit PDF text to 10k characters
25
+
26
+ # Initialize Gemini model (you'll need to set up your API key)
27
+ # from google.generativeai import GenerativeModel
28
+ # gemini_model = GenerativeModel('gemini-pro-vision')
29
+
30
+ def extract_clean_pdf_text(pdf_path: str) -> str:
31
+ """
32
+ Extracts and cleans text from a PDF file.
33
+ Args:
34
+ pdf_path (str): Path to the PDF file.
35
+ Returns:
36
+ str: Cleaned text extracted from the PDF.
37
+ """
38
+ text = []
39
+ with pdfplumber.open(pdf_path) as pdf:
40
+ for page in pdf.pages:
41
+ page_text = page.extract_text() or ""
42
+ text.append(page_text)
43
+ full_text = "\n".join(text)
44
+ # Clean up: remove excessive whitespace and newlines
45
+ cleaned_text = re.sub(r'\s+', ' ', full_text).strip()
46
+ return cleaned_text
47
+
48
+ def gemini_explain_file(file, question: Optional[str] = None) -> str:
49
+ if not file: return "⚠️ No file uploaded."
50
+ if not client:
51
+ return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY environment variable."
52
+
53
+ try:
54
+ file_path = file if isinstance(file, str) else file.name
55
+
56
+ if file_path.lower().endswith((".png", ".jpg", ".jpeg")):
57
+ img = Image.open(file_path)
58
+ prompt = f"Explain the science in this image. If there's a specific question, address it: {question}" if question else "Explain the science in this image."
59
+ response = client.models.generate_content(
60
+ model=GEMINI_MODEL_NAME,
61
+ contents=[prompt, img],
62
+ config=types.GenerateContentConfig(
63
+ thinking_config=types.ThinkingConfig(thinking_budget=0)
64
+ )
65
+ )
66
+ return response.text or "No response generated"
67
+ elif file_path.lower().endswith(".pdf"):
68
+ with pdfplumber.open(file_path) as pdf:
69
+ text = "\n".join(page.extract_text() or "" for page in pdf.pages)
70
+ prompt = f"Explain the science in this PDF, focusing on this question: {question}\n\nPDF Content:\n{text[:PDF_TEXT_LIMIT]}" if question else f"Summarize and explain the science in this PDF:\n\n{text[:PDF_TEXT_LIMIT]}"
71
+ response = client.models.generate_content(
72
+ model=GEMINI_MODEL_NAME,
73
+ contents=prompt,
74
+ config=types.GenerateContentConfig(
75
+ thinking_config=types.ThinkingConfig(thinking_budget=0)
76
+ )
77
+ )
78
+ return response.text or "No response generated"
79
+ else:
80
+ return "⚠️ Unsupported file type."
81
+ except Exception as e:
82
+ return f"❌ Gemini Error: {e}"