VaccineBot / langchain_google_gemini_api.py
NishantD's picture
Update langchain_google_gemini_api.py
ca37b2a verified
from dotenv import load_dotenv
load_dotenv()
import warnings
import google.generativeai as genai
import os
from pathlib import Path as p
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
warnings.filterwarnings("ignore")
# restart python kernal if issues with langchain import.
genai.configure(api_key=os.environ.get("google_api_key"))
"""### In Context Information Retreival
"""
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature = 0.3)
"""### Extract text from the PDF"""
pdf_loader = PyPDFLoader(r"C:\Users\DELL\Downloads\FAQ_on_Immunization_for_Health_Workers-English.pdf")
pages = pdf_loader.load_and_split()
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
not contained in the context, say "answer not available in context" \n\n
Context: \n {context}?\n
Question: \n {question} \n
Answer:
"""
prompt = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
"""### RAG Pipeline: Embedding + LLM"""
from langchain_google_genai import GoogleGenerativeAIEmbeddings
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)
# texts
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()
#input_ question here
question = "What are vaccine hesitancy and vaccine confidence?"
docs = vector_index.get_relevant_documents(question)
stuff_answer = stuff_chain(
{"input_documents": docs, "question": question}, return_only_outputs=True
)
# Access the text content from the dictionary
text = stuff_answer['output_text']
# Format the text into a paragraph by joining the lines with spaces
processed_output = " ".join(text.splitlines())
# Print the formatted paragraph
print(processed_output)