Spaces:

azaan34
/

FAIS_RAG

Runtime error

App Files Files Community

FAIS_RAG / app.py

azaan34

Update app.py

dfc6130 verified 9 months ago

raw

history blame contribute delete

2.63 kB

	import gradio as gr
	import os
	from langchain_groq import ChatGroq
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain.chains import create_retrieval_chain
	from langchain_community.vectorstores import FAISS
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from dotenv import load_dotenv
	import time

	load_dotenv()
	groq_api_key = ""


	llm = ChatGroq(groq_api_key=groq_api_key,
	model_name="deepseek-r1-distill-llama-70b")

	prompt = ChatPromptTemplate.from_template("""
	Answer the questions based on the provided context only.
	Please provide the most accurate response based on the question
	<context>
	{context}
	</context>
	Questions:{input}
	""")

	# Load the saved vector store
	embeddings = GoogleGenerativeAIEmbeddings(
	google_api_key="AIzaSyChjOLER-nWxh6tcB7vG3hW43o21VPGuu0",
	model="models/embedding-001"
	)
	vector_store = FAISS.load_local("faiss", embeddings, allow_dangerous_deserialization=True)
	retriever = vector_store.as_retriever()

	def process_query(question):
	document_chain = create_stuff_documents_chain(llm, prompt)
	retrieval_chain = create_retrieval_chain(retriever, document_chain)

	start = time.process_time()
	response = retrieval_chain.invoke({'input': question})
	processing_time = time.process_time() - start

	answer = response['answer']
	thinking = ""

	# Extract content between <think> tags
	if "<think>" in answer and "</think>" in answer:
	start_idx = answer.find("<think>") + len("<think>")
	end_idx = answer.find("</think>")
	thinking = answer[start_idx:end_idx].strip()
	# Remove the think tags and content from answer
	answer = answer[:start_idx-len("<think>")].strip() + " " + answer[end_idx+len("</think>"):].strip()

	return answer, thinking, f"Processing time: {processing_time:.2f}s"

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Gemma Model Document Q&A")

	with gr.Row():
	question_input = gr.Textbox(label="Enter your question")

	with gr.Row():
	submit_btn = gr.Button("Submit")

	with gr.Row():
	answer_output = gr.Textbox(label="Answer", lines=5)

	with gr.Row():
	thinking_output = gr.Textbox(label="Model's Thinking Process", lines=3)
	time_output = gr.Textbox(label="Processing Time")

	submit_btn.click(
	process_query,
	inputs=[question_input],
	outputs=[answer_output, thinking_output, time_output]
	)

	if __name__ == "__main__":
	demo.launch()