| import gradio as gr |
| import os |
| from langchain_groq import ChatGroq |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain.chains.combine_documents import create_stuff_documents_chain |
| from langchain.chains import create_retrieval_chain |
| from langchain_community.vectorstores import FAISS |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings |
| from dotenv import load_dotenv |
| import time |
|
|
| load_dotenv() |
| groq_api_key = "" |
|
|
|
|
| llm = ChatGroq(groq_api_key=groq_api_key, |
| model_name="deepseek-r1-distill-llama-70b") |
|
|
| prompt = ChatPromptTemplate.from_template(""" |
| Answer the questions based on the provided context only. |
| Please provide the most accurate response based on the question |
| <context> |
| {context} |
| </context> |
| Questions:{input} |
| """) |
|
|
| |
| embeddings = GoogleGenerativeAIEmbeddings( |
| google_api_key="AIzaSyChjOLER-nWxh6tcB7vG3hW43o21VPGuu0", |
| model="models/embedding-001" |
| ) |
| vector_store = FAISS.load_local("faiss", embeddings, allow_dangerous_deserialization=True) |
| retriever = vector_store.as_retriever() |
|
|
| def process_query(question): |
| document_chain = create_stuff_documents_chain(llm, prompt) |
| retrieval_chain = create_retrieval_chain(retriever, document_chain) |
| |
| start = time.process_time() |
| response = retrieval_chain.invoke({'input': question}) |
| processing_time = time.process_time() - start |
| |
| answer = response['answer'] |
| thinking = "" |
| |
| |
| if "<think>" in answer and "</think>" in answer: |
| start_idx = answer.find("<think>") + len("<think>") |
| end_idx = answer.find("</think>") |
| thinking = answer[start_idx:end_idx].strip() |
| |
| answer = answer[:start_idx-len("<think>")].strip() + " " + answer[end_idx+len("</think>"):].strip() |
| |
| return answer, thinking, f"Processing time: {processing_time:.2f}s" |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# Gemma Model Document Q&A") |
| |
| with gr.Row(): |
| question_input = gr.Textbox(label="Enter your question") |
| |
| with gr.Row(): |
| submit_btn = gr.Button("Submit") |
| |
| with gr.Row(): |
| answer_output = gr.Textbox(label="Answer", lines=5) |
| |
| with gr.Row(): |
| thinking_output = gr.Textbox(label="Model's Thinking Process", lines=3) |
| time_output = gr.Textbox(label="Processing Time") |
| |
| submit_btn.click( |
| process_query, |
| inputs=[question_input], |
| outputs=[answer_output, thinking_output, time_output] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|
|
|
|
|
|
|
|
|