Spaces:
Sleeping
Sleeping
| import os | |
| from huggingface_hub import login | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from langdetect import detect | |
| # === Login with Hugging Face Token (safe check) === | |
| hf_token = os.getenv("HF_TOKEN", None) | |
| if hf_token: | |
| login(hf_token) | |
| # === Load Lightweight Model (Demo-Friendly) === | |
| model_name = "MBZUAI/LaMini-Flan-T5-783M" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
| # === Translator (NLLB) === | |
| nllb_model = "facebook/nllb-200-distilled-600M" | |
| nllb_tokenizer = AutoTokenizer.from_pretrained(nllb_model) | |
| nllb = AutoModelForSeq2SeqLM.from_pretrained(nllb_model) | |
| translator = pipeline("translation", model=nllb, tokenizer=nllb_tokenizer) | |
| # === Language codes === | |
| lang_code_map = { | |
| "ta": "tam_Taml", "hi": "hin_Deva", "te": "tel_Telu", "ml": "mal_Mlym", "kn": "kan_Knda", | |
| "bn": "ben_Beng", "mr": "mar_Deva", "fr": "fra_Latn", "de": "deu_Latn", "ko": "kor_Hang", | |
| "zh": "zho_Hans", "zh-cn": "zho_Hans", "ja": "jpn_Jpan", "en": "eng_Latn" | |
| } | |
| # === FAISS + Embeddings === | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| try: | |
| index = faiss.read_index("guvi_faiss.index") | |
| with open("chunks.txt", "r", encoding="utf-8") as f: | |
| chunks = [line.strip() for line in f.readlines()] | |
| except Exception as e: | |
| print("⚠️ Index/chunks load failed:", e) | |
| index = None | |
| chunks = [] | |
| # === Prompt Builder === | |
| def build_rag_prompt(context_chunks, question): | |
| context_text = "\n".join(context_chunks) | |
| return f"""You are a multilingual chatbot for GUVI. The user may ask questions in any language. Answer in English, and only based on the context given. | |
| ### Context: | |
| {context_text} | |
| ### Question: | |
| {question} | |
| ### Answer:""" | |
| # === Chatbot Logic === | |
| def guvi_chatbot(user_input, history, user_lang): | |
| if not chunks or not index: | |
| return history + [[user_input, "⚠️ System not ready. Try again later."]] | |
| try: | |
| lang = detect(user_input) | |
| src_lang = lang_code_map.get(lang, "eng_Latn") | |
| if not user_lang: | |
| user_lang = src_lang | |
| original_input = user_input # Preserve for display | |
| if lang != "en": | |
| try: | |
| translated_input = translator(user_input, src_lang=src_lang, tgt_lang="eng_Latn")[0]['translation_text'] | |
| except: | |
| return history + [[original_input, "⚠️ Translation failed. Try in English."]] | |
| else: | |
| translated_input = user_input | |
| q_embed = embedder.encode([translated_input]) | |
| D, I = index.search(np.array(q_embed), 3) | |
| context_chunks = [chunks[i] for i in I[0]] | |
| prompt = build_rag_prompt(context_chunks, translated_input) | |
| try: | |
| result = generator(prompt, max_new_tokens=200)[0]["generated_text"] | |
| answer = result.strip() | |
| except: | |
| answer = "⚠️ Sorry, generation failed." | |
| if user_lang != "eng_Latn": | |
| try: | |
| answer = translator(answer, src_lang="eng_Latn", tgt_lang=user_lang)[0]['translation_text'] | |
| except: | |
| answer += "\n(⚠️ Back translation failed.)" | |
| history.append([original_input, answer]) | |
| return history | |
| except Exception: | |
| return history + [[user_input, "⚠️ Unexpected error. Try again."]] | |
| # === Gradio UI === | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| # 🤖 GUVI Multilingual Chatbot | |
| <span style="font-size:16px;">Ask me anything about GUVI in **any language** 🌍</span> | |
| """, | |
| elem_id="title" | |
| ) | |
| chatbot = gr.Chatbot( | |
| value=[["🤖", "**Hello! I’m your GUVI Assistant.**\nHow can I help you today?"]], | |
| height=400 | |
| ) | |
| with gr.Row(): | |
| user_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your question here... 💡", | |
| scale=4 | |
| ) | |
| send_btn = gr.Button("🚀 Send", variant="primary", scale=1) | |
| with gr.Row(): | |
| clear_btn = gr.Button("🗑️ Clear Chat", variant="stop") | |
| state = gr.State([]) | |
| user_lang = gr.State(None) | |
| # Button actions | |
| send_btn.click(fn=guvi_chatbot, inputs=[user_input, state, user_lang], outputs=chatbot) | |
| send_btn.click(fn=lambda _: "", inputs=user_input, outputs=user_input) | |
| clear_btn.click(lambda: [["🤖", "**Hello! I’m your GUVI Assistant.**\nHow can I help you today?"]], outputs=chatbot) | |
| clear_btn.click(lambda: [], outputs=state) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ⚡ *Tip: You can ask in Tamil, Hindi, French, Chinese, or English – I’ll handle the rest!* | |
| """, | |
| elem_id="footer" | |
| ) | |
| demo.launch() | |