""" Hugging Face Space: Multilingual Document Assistant Run this as a Gradio app on Hugging Face Spaces. Set HF_MODEL_ID to your Hub model (e.g. your-username/multilingual-doc-assistant). """ import os import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Model: Hub id (e.g. your-username/multilingual-doc-assistant) or local path. # On Spaces set HF_MODEL_ID in Settings → Variables. Local: use trained folder if present. _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) _LOCAL_MODEL = os.path.join(_SCRIPT_DIR, "multilingual-doc-model") HF_MODEL_ID = os.environ.get("HF_MODEL_ID") or (_LOCAL_MODEL if os.path.isdir(_LOCAL_MODEL) else "bigscience/bloom-560m") def load_pipeline(): tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_ID) model = AutoModelForCausalLM.from_pretrained(HF_MODEL_ID) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token device = 0 if torch.cuda.is_available() else -1 return pipeline( "text-generation", model=model, tokenizer=tokenizer, device=device, ) # Load once at startup (Spaces will cache) pipe = load_pipeline() def _get_text(content): """Extract plain text from Gradio message content (str or list of parts).""" if isinstance(content, str): return content if isinstance(content, list): for part in content: if isinstance(part, dict) and part.get("type") == "text": return part.get("text", "") if isinstance(part, str): return part return "" def build_prompt(history, message): parts = [] for turn in history: if isinstance(turn, (list, tuple)) and len(turn) >= 2: user_msg, assistant_msg = str(turn[0] or ""), str(turn[1] or "") elif isinstance(turn, dict): role = turn.get("role", "") content = _get_text(turn.get("content", "")) if role == "user": user_msg, assistant_msg = content, "" else: user_msg, assistant_msg = "", content if not user_msg and not assistant_msg: continue else: continue if user_msg: parts.append(f"User: {user_msg}\nAssistant: {assistant_msg}") parts.append(f"User: {message}\nAssistant:") return "\n".join(parts) def chat(message, history): if not message.strip(): return "" prompt = build_prompt(history, message) out = pipe( prompt, max_new_tokens=150, do_sample=True, temperature=0.7, pad_token_id=pipe.tokenizer.pad_token_id, ) full = out[0]["generated_text"] # Return only the new Assistant part (after the last "Assistant:") if "Assistant:" in full: reply = full.split("Assistant:")[-1].strip() else: reply = full[len(prompt):].strip() # Stop at next "User:" or double newline for stop in ["\nUser:", "\n\nUser:"]: if stop in reply: reply = reply.split(stop)[0].strip() return reply with gr.Blocks( title="Multilingual Document Assistant", theme=gr.themes.Soft(), ) as demo: gr.Markdown(""" # Multilingual Document Assistant **Supports:** Spanish · Chinese · Vietnamese · Portuguese Ask about documents, get explanations, or chat. *(Agent-style responses)* """) gr.ChatInterface( fn=chat, type="messages", examples=[ ["Explícame este documento: La IA mejora la productividad."], ["总结这段文字: 人工智能正在改变世界。"], ["Giải thích đoạn này: Công nghệ giúp cuộc sống dễ dàng hơn."], ], retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", ) gr.Markdown(f"*Model: `{HF_MODEL_ID}`*") if __name__ == "__main__": demo.launch()