Spaces:

NishantD
/

VaccineBot

Sleeping

App Files Files Community

VaccineBot / langchain_google_gemini_api.py

NishantD

Update langchain_google_gemini_api.py

ca37b2a verified over 1 year ago

raw

history blame contribute delete

2.39 kB



	from dotenv import load_dotenv
	load_dotenv()
	import warnings
	import google.generativeai as genai
	import os
	from pathlib import Path as p
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_google_genai import ChatGoogleGenerativeAI
	warnings.filterwarnings("ignore")

	# restart python kernal if issues with langchain import.

	genai.configure(api_key=os.environ.get("google_api_key"))
	"""### In Context Information Retreival
	"""
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature = 0.3)


	"""### Extract text from the PDF"""

	pdf_loader = PyPDFLoader(r"C:\Users\DELL\Downloads\FAQ_on_Immunization_for_Health_Workers-English.pdf")

	pages = pdf_loader.load_and_split()

	prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
	not contained in the context, say "answer not available in context" \n\n
	Context: \n {context}?\n
	Question: \n {question} \n
	Answer:
	"""

	prompt = PromptTemplate(
	template=prompt_template, input_variables=["context", "question"]
	)

	stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

	"""### RAG Pipeline: Embedding + LLM"""



	from langchain_google_genai import GoogleGenerativeAIEmbeddings

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
	context = "\n\n".join(str(p.page_content) for p in pages)
	texts = text_splitter.split_text(context)

	# texts

	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

	vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

	#input_ question here
	question = "What are vaccine hesitancy and vaccine confidence?"
	docs = vector_index.get_relevant_documents(question)

	stuff_answer = stuff_chain(
	{"input_documents": docs, "question": question}, return_only_outputs=True
	)

	# Access the text content from the dictionary
	text = stuff_answer['output_text']

	# Format the text into a paragraph by joining the lines with spaces
	processed_output = " ".join(text.splitlines())

	# Print the formatted paragraph
	print(processed_output)