from dotenv import load_dotenv load_dotenv() import warnings import google.generativeai as genai import os from pathlib import Path as p from langchain.prompts import PromptTemplate from langchain.chains.question_answering import load_qa_chain from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_google_genai import ChatGoogleGenerativeAI warnings.filterwarnings("ignore") # restart python kernal if issues with langchain import. genai.configure(api_key=os.environ.get("google_api_key")) """### In Context Information Retreival """ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature = 0.3) """### Extract text from the PDF""" pdf_loader = PyPDFLoader(r"C:\Users\DELL\Downloads\FAQ_on_Immunization_for_Health_Workers-English.pdf") pages = pdf_loader.load_and_split() prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """ prompt = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) """### RAG Pipeline: Embedding + LLM""" from langchain_google_genai import GoogleGenerativeAIEmbeddings text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0) context = "\n\n".join(str(p.page_content) for p in pages) texts = text_splitter.split_text(context) # texts embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vector_index = Chroma.from_texts(texts, embeddings).as_retriever() #input_ question here question = "What are vaccine hesitancy and vaccine confidence?" docs = vector_index.get_relevant_documents(question) stuff_answer = stuff_chain( {"input_documents": docs, "question": question}, return_only_outputs=True ) # Access the text content from the dictionary text = stuff_answer['output_text'] # Format the text into a paragraph by joining the lines with spaces processed_output = " ".join(text.splitlines()) # Print the formatted paragraph print(processed_output)