Spaces:
Sleeping
Sleeping
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import warnings | |
| import google.generativeai as genai | |
| import os | |
| from pathlib import Path as p | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| warnings.filterwarnings("ignore") | |
| # restart python kernal if issues with langchain import. | |
| genai.configure(api_key=os.environ.get("google_api_key")) | |
| """### In Context Information Retreival | |
| """ | |
| model = ChatGoogleGenerativeAI(model="gemini-pro", temperature = 0.3) | |
| """### Extract text from the PDF""" | |
| pdf_loader = PyPDFLoader(r"C:\Users\DELL\Downloads\FAQ_on_Immunization_for_Health_Workers-English.pdf") | |
| pages = pdf_loader.load_and_split() | |
| prompt_template = """Answer the question as precise as possible using the provided context. If the answer is | |
| not contained in the context, say "answer not available in context" \n\n | |
| Context: \n {context}?\n | |
| Question: \n {question} \n | |
| Answer: | |
| """ | |
| prompt = PromptTemplate( | |
| template=prompt_template, input_variables=["context", "question"] | |
| ) | |
| stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) | |
| """### RAG Pipeline: Embedding + LLM""" | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0) | |
| context = "\n\n".join(str(p.page_content) for p in pages) | |
| texts = text_splitter.split_text(context) | |
| # texts | |
| embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
| vector_index = Chroma.from_texts(texts, embeddings).as_retriever() | |
| #input_ question here | |
| question = "What are vaccine hesitancy and vaccine confidence?" | |
| docs = vector_index.get_relevant_documents(question) | |
| stuff_answer = stuff_chain( | |
| {"input_documents": docs, "question": question}, return_only_outputs=True | |
| ) | |
| # Access the text content from the dictionary | |
| text = stuff_answer['output_text'] | |
| # Format the text into a paragraph by joining the lines with spaces | |
| processed_output = " ".join(text.splitlines()) | |
| # Print the formatted paragraph | |
| print(processed_output) |