Spaces:

NishantD
/

VaccineBot

Sleeping

App Files Files Community

VaccineBot / app.py

NishantD

Upload 8 files

c8b7844 verified over 1 year ago

raw

history blame contribute delete

2.69 kB

	import chainlit as cl
	from dotenv import load_dotenv
	load_dotenv()
	import warnings
	import google.generativeai as genai
	import os
	from pathlib import Path as p
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_google_genai import ChatGoogleGenerativeAI
	warnings.filterwarnings("ignore")
	# restart python kernal if issues with langchain import.

	genai.configure(api_key=os.environ.get("google_api_key"))
	"""### In Context Information Retreival
	"""
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature = 0.5)

	"""### Extract text from the PDF"""

	pdf_loader = PyPDFLoader(r'data/data.pdf')

	pages = pdf_loader.load_and_split()

	prompt_template = """Your name is खोप_Bot, a knowledgeable and helpful medical assistant specializing in Immunization information. User can ask you any information related to Immunization in Nepal. Your goal is to provide accurate, clear, and supportive answers to vaccine-related questions. Replace other country name with Nepal.You'll be presented with a context and a question. Carefully examine the context to extract any relevant information that can help you answer the question precisely.Answer maynot always be in context, In that case answer relevent to the context\n\n
	Context: \n {context}?\n
	Question: \n {question} \n
	Answer:
	"""

	prompt = PromptTemplate(
	template=prompt_template, input_variables=["context", "question"]
	)

	stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

	"""### RAG Pipeline: Embedding + LLM"""

	from langchain_google_genai import GoogleGenerativeAIEmbeddings

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
	context = "\n\n".join(str(p.page_content) for p in pages)
	texts = text_splitter.split_text(context)

	# texts

	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

	vector_index = Chroma.from_texts(texts, embeddings).as_retriever()


	@cl.on_message
	async def main(message: cl.Message):
	docs = vector_index.get_relevant_documents(message.content)

	stuff_answer = stuff_chain(
	{"input_documents": docs, "question": message.content}, return_only_outputs=True
	)

	# Access the text content from the dictionary
	text = stuff_answer['output_text']
	await cl.Message(
	content=text,
	).send()