AnnieVin's picture
Update app.py
49d6788 verified
import os
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
import gradio as gr
import faiss
import numpy as np
from langdetect import detect
# === Login with Hugging Face Token (safe check) ===
hf_token = os.getenv("HF_TOKEN", None)
if hf_token:
login(hf_token)
# === Load Lightweight Model (Demo-Friendly) ===
model_name = "MBZUAI/LaMini-Flan-T5-783M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
# === Translator (NLLB) ===
nllb_model = "facebook/nllb-200-distilled-600M"
nllb_tokenizer = AutoTokenizer.from_pretrained(nllb_model)
nllb = AutoModelForSeq2SeqLM.from_pretrained(nllb_model)
translator = pipeline("translation", model=nllb, tokenizer=nllb_tokenizer)
# === Language codes ===
lang_code_map = {
"ta": "tam_Taml", "hi": "hin_Deva", "te": "tel_Telu", "ml": "mal_Mlym", "kn": "kan_Knda",
"bn": "ben_Beng", "mr": "mar_Deva", "fr": "fra_Latn", "de": "deu_Latn", "ko": "kor_Hang",
"zh": "zho_Hans", "zh-cn": "zho_Hans", "ja": "jpn_Jpan", "en": "eng_Latn"
}
# === FAISS + Embeddings ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
try:
index = faiss.read_index("guvi_faiss.index")
with open("chunks.txt", "r", encoding="utf-8") as f:
chunks = [line.strip() for line in f.readlines()]
except Exception as e:
print("⚠️ Index/chunks load failed:", e)
index = None
chunks = []
# === Prompt Builder ===
def build_rag_prompt(context_chunks, question):
context_text = "\n".join(context_chunks)
return f"""You are a multilingual chatbot for GUVI. The user may ask questions in any language. Answer in English, and only based on the context given.
### Context:
{context_text}
### Question:
{question}
### Answer:"""
# === Chatbot Logic ===
def guvi_chatbot(user_input, history, user_lang):
if not chunks or not index:
return history + [[user_input, "⚠️ System not ready. Try again later."]]
try:
lang = detect(user_input)
src_lang = lang_code_map.get(lang, "eng_Latn")
if not user_lang:
user_lang = src_lang
original_input = user_input # Preserve for display
if lang != "en":
try:
translated_input = translator(user_input, src_lang=src_lang, tgt_lang="eng_Latn")[0]['translation_text']
except:
return history + [[original_input, "⚠️ Translation failed. Try in English."]]
else:
translated_input = user_input
q_embed = embedder.encode([translated_input])
D, I = index.search(np.array(q_embed), 3)
context_chunks = [chunks[i] for i in I[0]]
prompt = build_rag_prompt(context_chunks, translated_input)
try:
result = generator(prompt, max_new_tokens=200)[0]["generated_text"]
answer = result.strip()
except:
answer = "⚠️ Sorry, generation failed."
if user_lang != "eng_Latn":
try:
answer = translator(answer, src_lang="eng_Latn", tgt_lang=user_lang)[0]['translation_text']
except:
answer += "\n(⚠️ Back translation failed.)"
history.append([original_input, answer])
return history
except Exception:
return history + [[user_input, "⚠️ Unexpected error. Try again."]]
# === Gradio UI ===
with gr.Blocks(theme=gr.themes.Soft()) as demo:
with gr.Column():
gr.Markdown(
"""
# 🤖 GUVI Multilingual Chatbot
<span style="font-size:16px;">Ask me anything about GUVI in **any language** 🌍</span>
""",
elem_id="title"
)
chatbot = gr.Chatbot(
value=[["🤖", "**Hello! I’m your GUVI Assistant.**\nHow can I help you today?"]],
height=400
)
with gr.Row():
user_input = gr.Textbox(
show_label=False,
placeholder="Type your question here... 💡",
scale=4
)
send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
with gr.Row():
clear_btn = gr.Button("🗑️ Clear Chat", variant="stop")
state = gr.State([])
user_lang = gr.State(None)
# Button actions
send_btn.click(fn=guvi_chatbot, inputs=[user_input, state, user_lang], outputs=chatbot)
send_btn.click(fn=lambda _: "", inputs=user_input, outputs=user_input)
clear_btn.click(lambda: [["🤖", "**Hello! I’m your GUVI Assistant.**\nHow can I help you today?"]], outputs=chatbot)
clear_btn.click(lambda: [], outputs=state)
gr.Markdown(
"""
---
⚡ *Tip: You can ask in Tamil, Hindi, French, Chinese, or English – I’ll handle the rest!*
""",
elem_id="footer"
)
demo.launch()