chatbox / app.py
anaspro's picture
Update app.py
3f28b8d verified
raw
history blame
4.33 kB
import os
import torch
import gradio as gr
import spaces
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from huggingface_hub import login
# ======================================================
# Settings
# ======================================================
MODEL_ID = "anaspro/gemma3-iraqi"
# Load system prompt from external file
with open("system_prompt.txt", "r", encoding="utf-8") as f:
SYSTEM_PROMPT = f.read()
# Login to Hugging Face
if os.getenv("HF_TOKEN"):
login(token=os.getenv("HF_TOKEN"))
print("🔐 Logged in to Hugging Face")
# Global model variables
model = None
tokenizer = None
# ======================================================
# Chat function (ZeroGPU)
# ======================================================
@spaces.GPU(duration=120)
def chat(message, history):
global model, tokenizer
# Load model once
if model is None:
print("🔄 Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
dtype=torch.bfloat16,
device_map="auto",
)
model.eval()
print("✅ Model loaded!")
else:
print("♻️ Reusing already loaded model in memory.")
# ======================================================
# Build conversation
# ======================================================
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Add conversation history
for turn in history:
if isinstance(turn, dict):
role = turn.get("role")
content = turn.get("content")
if role and content:
messages.append({"role": role, "content": content})
elif isinstance(turn, (list, tuple)) and len(turn) == 2:
messages.append({"role": "user", "content": turn[0]})
messages.append({"role": "assistant", "content": turn[1]})
# Add current user message
messages.append({"role": "user", "content": message})
# ======================================================
# Tokenize input
# ======================================================
input_ids = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
add_generation_prompt=True
).to(model.device)
# ======================================================
# Setup text streamer
# ======================================================
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = {
"input_ids": input_ids,
"streamer": streamer,
"max_new_tokens": 1024,
"temperature": 0.85,
"top_p": 0.9,
"top_k": 50,
"do_sample": True,
"repetition_penalty": 1.1,
"eos_token_id": None, # ⬅️ مهم حتى لا يتوقف مبكراً
}
# ======================================================
# Generate output in a separate thread
# ======================================================
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
print(new_text, end="", flush=True)
yield partial_text
thread.join()
# ======================================================
# Gradio Interface
# ======================================================
demo = gr.ChatInterface(
fn=chat,
type="messages",
title="📞 دعم فني - NB TEL Internet Assistant",
description=(
"**مساعد ذكي لخدمة الدعم الفني في شبكة النور - NB TEL**\n\n"
"تحدث معه كأنك زبون: اشرح مشكلتك، اسأل عن الباقات، أو اطلب تذكرة دعم."
),
examples=[
["الإنترنت عندي مقطوع من الصبح، شنو السبب؟"],
["أريد أرقّي الباقة إلى 50 ميج."],
["ضوء الـ LOS في جهاز الفايبر أحمر، شنو معناها؟"],
],
theme=gr.themes.Soft(),
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()