webchatbot / rag_pdf.py
ctohereandnowai's picture
initial commit
7cf4a0a verified
from openai import OpenAI
from dotenv import load_dotenv
import os
import requests
import PyPDF2
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
model = "gemini-2.5-flash"
base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
client = OpenAI(base_url=base_url,api_key=api_key)
url = "https://raw.githubusercontent.com/hereandnowai/rag-workshop/main/pdfs/About_HERE_AND_NOW_AI.pdf"
response = requests.get(url)
script_dir = os.path.dirname(os.path.abspath(__file__))
file_name= os.path.join(script_dir,"prospect-hereandnowai.pdf")
with open(file_name, "wb") as f:
f.write(response.content)
try:
with open(file_name , "rb") as f:
reader = PyPDF2.PdfReader(f)
pdf_text_chunks = []
for page in reader.pages:
page_text = page.extract_text()
if page_text:
pdf_text_chunks.append(page_text.strip())
pdf_context = "\n".join(pdf_text_chunks) if pdf_text_chunks else " No text found in pdf"
except Exception as e:
print(f"error no pdf text found {e}")
pdf_context = "error no pdf text found"
system_prompt = f"""content from pdf {file_name}:\n {pdf_context}
answer the questions based on the context
if you cannot find the answer say so. dont give wrong information
"""
def get_response(usermessage, history):
messages = [{"role":"system", "content":system_prompt}]
messages.extend(history)
messages.append({"role":"user","content":usermessage})
response = client.chat.completions.create(model=model,messages=messages)
return response.choices[0].message.content
if __name__ == "__main__":
print(get_response("who is the CTO",[]))