import os import time import fitz # PyMuPDF import faiss import google.generativeai as genai from tenacity import retry, stop_after_attempt, wait_fixed from sentence_transformers import SentenceTransformer import os # from google.colab import userdata from google.genai import types from google import genai import gradio as gr import json api_key =os.getenv("GEMINI_API_KEY") # Gemini client client = genai.Client(api_key=api_key) print("Gemini client ready") embed_model = SentenceTransformer("all-MiniLM-L6-v2") print("Embedding model loaded") faiss_index = faiss.read_index("faiss.index") with open("chunks.json","r",encoding="utf-8") as f: chunks = json.load(f) def call_llm(context, user_question): prompt = f""" Use only the provided context to answer. Context: {context} Question: {user_question} """ response = client.models.generate_content( model="gemini-3-flash-preview", contents=prompt, config=types.GenerateContentConfig( system_instruction=""" You are a helpful RAG assistant. Answer clearly using only the context. If answer not found, say not found in document. """, max_output_tokens=1024 ) ) return response.text def search_faiss(question, top_k=3): q_embed = embed_model.encode([question]) distances, indices = faiss_index.search(q_embed, top_k) results = [] for idx in indices[0]: results.append(chunks[idx]) context = "\n".join(results) answer = call_llm(context, question) return answer def ask_question(question): answer = search_faiss(question) return answer iface = gr.Interface( fn=ask_question, inputs=gr.Textbox(lines=2, placeholder="Ask something..."), outputs=gr.Textbox(lines=2, placeholder="Ask something..."), title="Nexon EV Bot" ) iface.launch()