| import os |
| import torch |
| import gradio as gr |
| import spaces |
| from threading import Thread |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer |
| from huggingface_hub import login |
|
|
| |
| |
| |
| MODEL_ID = "anaspro/gemma3-iraqi" |
|
|
| |
| with open("system_prompt.txt", "r", encoding="utf-8") as f: |
| SYSTEM_PROMPT = f.read() |
|
|
| |
| if os.getenv("HF_TOKEN"): |
| login(token=os.getenv("HF_TOKEN")) |
| print("🔐 Logged in to Hugging Face") |
|
|
| |
| model = None |
| tokenizer = None |
|
|
| |
| |
| |
| @spaces.GPU(duration=120) |
| def chat(message, history): |
| global model, tokenizer |
|
|
| |
| if model is None: |
| print("🔄 Loading model...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| dtype=torch.bfloat16, |
| device_map="auto", |
| ) |
| model.eval() |
| print("✅ Model loaded!") |
| else: |
| print("♻️ Reusing already loaded model in memory.") |
|
|
| |
| |
| |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] |
|
|
| |
| for turn in history: |
| if isinstance(turn, dict): |
| role = turn.get("role") |
| content = turn.get("content") |
| if role and content: |
| messages.append({"role": role, "content": content}) |
| elif isinstance(turn, (list, tuple)) and len(turn) == 2: |
| messages.append({"role": "user", "content": turn[0]}) |
| messages.append({"role": "assistant", "content": turn[1]}) |
|
|
| |
| messages.append({"role": "user", "content": message}) |
|
|
| |
| |
| |
| input_ids = tokenizer.apply_chat_template( |
| messages, |
| return_tensors="pt", |
| add_generation_prompt=True |
| ).to(model.device) |
|
|
| |
| |
| |
| streamer = TextIteratorStreamer( |
| tokenizer, |
| skip_prompt=True, |
| skip_special_tokens=True |
| ) |
|
|
| generation_kwargs = { |
| "input_ids": input_ids, |
| "streamer": streamer, |
| "max_new_tokens": 1024, |
| "temperature": 0.85, |
| "top_p": 0.9, |
| "top_k": 50, |
| "do_sample": True, |
| "repetition_penalty": 1.1, |
| "eos_token_id": None, |
| } |
|
|
| |
| |
| |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) |
| thread.start() |
|
|
| partial_text = "" |
| for new_text in streamer: |
| partial_text += new_text |
| print(new_text, end="", flush=True) |
| yield partial_text |
|
|
| thread.join() |
|
|
|
|
| |
| |
| |
| demo = gr.ChatInterface( |
| fn=chat, |
| type="messages", |
| title="📞 دعم فني - NB TEL Internet Assistant", |
| description=( |
| "**مساعد ذكي لخدمة الدعم الفني في شبكة النور - NB TEL**\n\n" |
| "تحدث معه كأنك زبون: اشرح مشكلتك، اسأل عن الباقات، أو اطلب تذكرة دعم." |
| ), |
| examples=[ |
| ["الإنترنت عندي مقطوع من الصبح، شنو السبب؟"], |
| ["أريد أرقّي الباقة إلى 50 ميج."], |
| ["ضوء الـ LOS في جهاز الفايبر أحمر، شنو معناها؟"], |
| ], |
| theme=gr.themes.Soft(), |
| cache_examples=False, |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|