FAIS_RAG / app.py
azaan34's picture
Update app.py
dfc6130 verified
import gradio as gr
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import time
load_dotenv()
groq_api_key = ""
llm = ChatGroq(groq_api_key=groq_api_key,
model_name="deepseek-r1-distill-llama-70b")
prompt = ChatPromptTemplate.from_template("""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
</context>
Questions:{input}
""")
# Load the saved vector store
embeddings = GoogleGenerativeAIEmbeddings(
google_api_key="AIzaSyChjOLER-nWxh6tcB7vG3hW43o21VPGuu0",
model="models/embedding-001"
)
vector_store = FAISS.load_local("faiss", embeddings, allow_dangerous_deserialization=True)
retriever = vector_store.as_retriever()
def process_query(question):
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
start = time.process_time()
response = retrieval_chain.invoke({'input': question})
processing_time = time.process_time() - start
answer = response['answer']
thinking = ""
# Extract content between <think> tags
if "<think>" in answer and "</think>" in answer:
start_idx = answer.find("<think>") + len("<think>")
end_idx = answer.find("</think>")
thinking = answer[start_idx:end_idx].strip()
# Remove the think tags and content from answer
answer = answer[:start_idx-len("<think>")].strip() + " " + answer[end_idx+len("</think>"):].strip()
return answer, thinking, f"Processing time: {processing_time:.2f}s"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Gemma Model Document Q&A")
with gr.Row():
question_input = gr.Textbox(label="Enter your question")
with gr.Row():
submit_btn = gr.Button("Submit")
with gr.Row():
answer_output = gr.Textbox(label="Answer", lines=5)
with gr.Row():
thinking_output = gr.Textbox(label="Model's Thinking Process", lines=3)
time_output = gr.Textbox(label="Processing Time")
submit_btn.click(
process_query,
inputs=[question_input],
outputs=[answer_output, thinking_output, time_output]
)
if __name__ == "__main__":
demo.launch()