import os import queue from threading import Thread from dotenv import load_dotenv print(f"Start loading .env") load_dotenv() print(f"Finish loading .env") from langchain.callbacks.base import BaseCallbackHandler print(f"Start importing from rag_func") from rag_func import prepare_RAG, retrieve_RAG, generate_RAG print(f"Finish importing from rag_func") import gradio as gr # ----------------- Context Setup ----------------- # For local execution, where the user can select the directory # user_input = input("Enter a subfolder inside 'context' (press Enter for full 'context'): ").strip() # if user_input: # user_dir = os.path.join("context", user_input) # else: # user_dir = "context" # print(f"[Info] Using context directory: {user_dir}") # For gradio deploy. The user cannot choose the directory user_dir = "context" pinecone_API = os.getenv("PINECONE_API") index_name = "regulatorik" #llm_model = os.getenv("MODELNAME") llm_model="gpt-5-nano" index, pc, llm = prepare_RAG(pinecone_API, index_name, llm_model=llm_model, dir_name=user_dir) # ----------------- Chat Functions ----------------- def add_user_message(message, history): history = history or [] history.append({"role": "user", "content": message}) return "", history, history import time # ----------------- Streaming Handler ----------------- class StreamHandler(BaseCallbackHandler): def __init__(self, q: queue.Queue): self.q = q self.first_token_received = False self.ttft = None # time to first token def on_llm_new_token(self, token: str, **kwargs): if not self.first_token_received: self.ttft = time.time() - self.start_time self.first_token_received = True self.q.put(token) def on_llm_end(self, *args, **kwargs): self.total_time = time.time() - self.start_time self.q.put("[[END]]") # ----------------- Chat Functions with timing ----------------- def generate_bot_response(history): if not history or history[-1]["role"] != "user": yield history, history return user_msg = history[-1]["content"] retrieved_chunks = retrieve_RAG(user_msg, pc, index) q = queue.Queue() handler = StreamHandler(q) handler.start_time = time.time() model_name = getattr(llm, "model_name", getattr(llm, "model", None)) streaming_llm = llm.__class__(model=model_name, streaming=True, callbacks=[handler]) def run_llm(): try: generate_RAG(user_msg, streaming_llm, retrieved_chunks) finally: q.put("[[END]]") Thread(target=run_llm, daemon=True).start() partial = "" history.append({"role": "assistant", "content": ""}) while True: token = q.get() if token == "[[END]]": print(f"[Timing] TTFT: {handler.ttft:.3f} s, Total: {handler.total_time:.3f} s") break partial += token history[-1]["content"] = partial yield history, history # ----------------- Simplified CSS for Default Gradio Font ----------------- custom_css = """ :root { --brand-blue: #17428f; --brand-orange: #f39719; --text-dark: #111827; /* very dark grey (near black) */ --text-gray: #4B5563; /* medium grey for messages */ color-scheme: only light; } body, .gradio-container { /* Default Gradio font will be used */ background: linear-gradient(135deg, var(--brand-blue) 0%, var(--brand-orange) 100%); min-height: 100vh; color: var(--text-dark); } /* Top bar transparent */ #topbar { background: transparent !important; } /* Header text over gradient */ #header h1, #header h2, #header h3, #header h4, #header h5, #header h6, #header p { color: #ffffff; text-align: center; } /* Chatbox container */ #chatbot { height: 600px; border-radius: 14px; border: 2px solid var(--brand-blue); background-color: #ffffff; padding: 8px; overflow-y: auto; } /* Chat text */ #chatbot, #chatbot * { color: var(--text-gray) !important; } /* Bubble styling */ #chatbot .message.user { background: #fff4e1; border-radius: 10px; padding: 6px 12px; color: var(--text-gray) !important; text-align: right; } #chatbot .message.bot { background: #f0f0f0; border-radius: 10px; padding: 6px 12px; color: var(--text-gray) !important; text-align: left; } /* Fallback selectors for other Gradio versions */ #chatbot [data-testid*="message"] { border-radius: 10px; padding: 6px 12px; } #chatbot [data-testid="user-message"] { background: #fff4e1; color: var(--text-gray) !important; text-align: right; } #chatbot [data-testid="assistant-message"] { background: #f0f0f0; color: var(--text-gray) !important; text-align: left; } /* Inputs */ input[type="text"], textarea, .gr-text-input input, .gr-textbox textarea { border-radius: 10px; padding: 10px; font-size: 16px; border: 2px solid var(--brand-orange); } input:focus, textarea:focus, .gr-text-input input:focus, .gr-textbox textarea:focus { border-color: var(--brand-blue); outline: none; box-shadow: 0 0 6px rgba(23, 66, 143, 0.5); } /* Buttons (global gradient) */ .gr-button, button { border-radius: 10px; font-weight: 600; background: linear-gradient(90deg, var(--brand-blue), var(--brand-orange)); color: white; border: none; } .gr-button:hover, button:hover { transform: translateY(-2px); box-shadow: 0 4px 8px rgba(0,0,0,0.2); } /* Chat area: icon-only buttons */ #chatbot { --icon-light: #9CA3AF; --icon-hover: #6B7280; } /* Tint SVG icons */ #chatbot button svg, #chatbot [role="button"] svg, #chatbot .icon svg, #chatbot [class*="icon"] svg, #chatbot [data-testid*="icon"] svg, #chatbot [data-testid*="message"] .tools svg, #chatbot .message-tools svg { color: var(--icon-light) !important; fill: var(--icon-light) !important; stroke: var(--icon-light) !important; opacity: 0.95; } /* Remove gradient background only on small icon-only buttons */ #chatbot :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient) { background: transparent !important; background-image: none !important; border: none !important; box-shadow: none !important; padding: 6px !important; border-radius: 8px !important; color: var(--icon-light) !important; } /* Hover/focus/active states */ #chatbot :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):hover { background-color: rgba(0,0,0,0.05) !important; } #chatbot :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):focus-visible { outline: none !important; box-shadow: 0 0 0 2px rgba(23, 66, 143, 0.35) !important; background-color: rgba(0,0,0,0.06) !important; } #chatbot :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):active { background-color: rgba(0,0,0,0.08) !important; } /* Optional 'danger' icons */ #chatbot .danger svg { color: var(--icon-light) !important; fill: var(--icon-light) !important; stroke: var(--icon-light) !important; } #chatbot .danger:hover svg { color: #ef4444 !important; fill: #ef4444 !important; stroke: #ef4444 !important; } #topbar .gr-button.keep-gradient, #topbar .gr-button:not(:has(svg)) { background: linear-gradient(90deg, var(--brand-blue), var(--brand-orange)) !important; color: #fff !important; } /* Icon-only buttons in topbar: transparent */ #topbar :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient) { background: transparent !important; border: none !important; box-shadow: none !important; padding: 6px !important; border-radius: 8px !important; color: var(--icon-light) !important; } /* Tint SVGs in topbar */ #topbar :is(button,[role="button"]):has(> svg) > svg { color: var(--icon-light) !important; fill: var(--icon-light) !important; stroke: var(--icon-light) !important; opacity: 0.95; } /* Hover/focus/active for topbar icons */ #topbar :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):hover { background-color: rgba(0,0,0,0.05) !important; } #topbar :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):focus-visible { outline: none !important; box-shadow: 0 0 0 2px rgba(23, 66, 143, 0.35) !important; background-color: rgba(0,0,0,0.06) !important; } #topbar :is(button,[role="button"]):is([aria-label],[title], :has(> svg)):not(.keep-gradient):active { background-color: rgba(0,0,0,0.08) !important; } """ js_force_light = """ function refresh() { const url = new URL(window.location); if (url.searchParams.get('__theme') !== 'light') { url.searchParams.set('__theme', 'light'); window.location.replace(url); } } """ # ----------------- Gradio App ----------------- with gr.Blocks(css=custom_css, fill_height=True, js=js_force_light) as demo: gr.Markdown( "
Ask questions and get accurate answers from your documents.
", elem_id="header" ) chatbot = gr.Chatbot(type="messages", label="Conversation", elem_id="chatbot", height=600) msg = gr.Textbox(label="Your message", placeholder="Type your question here...") state = gr.State([]) msg.submit(add_user_message, inputs=[msg, state], outputs=[msg, chatbot, state]) \ .then(generate_bot_response, inputs=[state], outputs=[chatbot, state]) # Launch if __name__ == "__main__": demo.launch(share=True)