anaspro commited on
Commit
0f96efd
·
verified ·
1 Parent(s): cf07170

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -43
app.py CHANGED
@@ -1,23 +1,35 @@
1
  import os
2
  os.system("pip install -q accelerate")
3
 
4
- import gradio as gr
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
- from threading import Thread
8
  import spaces
 
 
 
 
 
 
 
9
 
10
- MODEL_NAME = "anaspro/iraqi-kashif-2b"
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
12
 
 
 
 
13
  @spaces.GPU
14
  def load_model():
15
- print("🔄 Loading model and tokenizer...")
16
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  model = AutoModelForCausalLM.from_pretrained(
18
- MODEL_NAME,
19
- dtype=torch.float16,
20
  device_map="auto",
 
21
  )
22
  model.eval()
23
  print("✅ Model loaded successfully!")
@@ -25,57 +37,86 @@ def load_model():
25
 
26
  tokenizer, model = load_model()
27
 
28
- @spaces.GPU
29
- def respond(
30
- message,
31
- history: list[dict[str, str]],
32
- system_message,
33
- max_tokens,
34
- temperature,
35
- top_p,
36
- ):
37
- messages = [{"role": "system", "content": system_message}]
38
- messages.extend(history)
 
 
39
  messages.append({"role": "user", "content": message})
40
 
41
- prompt = tokenizer.apply_chat_template(
 
42
  messages,
43
- tokenize=False,
44
- add_generation_prompt=True,
45
- )
46
 
47
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
48
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
 
49
 
50
  generation_kwargs = dict(
51
- **inputs,
52
  streamer=streamer,
53
- max_new_tokens=max_tokens,
54
- temperature=temperature,
55
- top_p=top_p,
56
  do_sample=True,
 
57
  )
58
 
 
59
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
60
  thread.start()
61
 
62
- response = ""
63
  for new_text in streamer:
64
- response += new_text
65
- yield response
 
 
66
 
 
 
 
 
 
 
 
 
67
 
68
- # لا نستخدم gr.get_state() إطلاقًا، فقط Gradio ChatInterface العادي
69
- chatbot = gr.ChatInterface(
70
- fn=respond,
71
- type="messages",
72
- additional_inputs=[
73
- gr.Textbox(value="أنت مساعد ذكي باللهجة العراقية.", label="System message"),
74
- gr.Slider(minimum=32, maximum=1024, value=256, step=8, label="Max tokens"),
75
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
76
- gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p"),
 
 
 
 
 
 
77
  ],
 
78
  )
79
 
 
 
 
80
  if __name__ == "__main__":
81
- chatbot.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  os.system("pip install -q accelerate")
3
 
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  import torch
6
+ import gradio as gr
 
7
  import spaces
8
+ from threading import Thread
9
+
10
+ # ======================================================
11
+ # إعداد الموديل الخاص بك
12
+ # ======================================================
13
+ MODEL_ID = "anaspro/iraqi-kashif-2b" # ✅ موديلك الخاص
14
+ HF_TOKEN = os.environ.get("HF_TOKEN") # من Secrets في Space
15
 
16
+ if HF_TOKEN:
17
+ print("🔐 HF_TOKEN detected, using authenticated download...")
18
+ else:
19
+ print("⚠️ Warning: HF_TOKEN not found. Public model access only (may fail if private).")
20
 
21
+ # ======================================================
22
+ # تحميل النموذج داخل ZeroGPU Worker (لتفادي CUDA init)
23
+ # ======================================================
24
  @spaces.GPU
25
  def load_model():
26
+ print("🔄 Loading Iraqi Kashif 2B model...")
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
28
  model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_ID,
30
+ dtype=torch.bfloat16,
31
  device_map="auto",
32
+ token=HF_TOKEN,
33
  )
34
  model.eval()
35
  print("✅ Model loaded successfully!")
 
37
 
38
  tokenizer, model = load_model()
39
 
40
+ # ======================================================
41
+ # دالة الدردشة (Streaming)
42
+ # ======================================================
43
+ @spaces.GPU(duration=60)
44
+ def chat(message, history):
45
+ """
46
+ نموذج Iraqi Kashif 2B مدرب للهجة العراقية والفصحى
47
+ """
48
+ # بناء سياق المحادثة
49
+ messages = []
50
+ for user_msg, bot_msg in history:
51
+ messages.append({"role": "user", "content": user_msg})
52
+ messages.append({"role": "assistant", "content": bot_msg})
53
  messages.append({"role": "user", "content": message})
54
 
55
+ # تطبيق القالب من chat_template.jinja
56
+ input_ids = tokenizer.apply_chat_template(
57
  messages,
58
+ return_tensors="pt",
59
+ add_generation_prompt=True
60
+ ).to(model.device)
61
 
62
+ # Streamer للبث الحي أثناء التوليد
63
+ streamer = TextIteratorStreamer(
64
+ tokenizer,
65
+ skip_prompt=True,
66
+ skip_special_tokens=True
67
+ )
68
 
69
  generation_kwargs = dict(
70
+ input_ids=input_ids,
71
  streamer=streamer,
72
+ max_new_tokens=512,
73
+ temperature=0.7,
74
+ top_p=0.9,
75
  do_sample=True,
76
+ repetition_penalty=1.1,
77
  )
78
 
79
+ # تشغيل التوليد في Thread منفصل
80
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
81
  thread.start()
82
 
83
+ partial_text = ""
84
  for new_text in streamer:
85
+ partial_text += new_text
86
+ yield partial_text
87
+
88
+ thread.join()
89
 
90
+ # ======================================================
91
+ # واجهة Gradio
92
+ # ======================================================
93
+ demo = gr.ChatInterface(
94
+ fn=chat,
95
+ title="🇮🇶 Iraqi Kashif 2B - Chat Demo",
96
+ description="""
97
+ **نموذج Iraqi Kashif 2B**
98
 
99
+ 🧠 **مميزات النموذج:**
100
+ - مدرب على اللهجة العراقية + العربية الفصحى
101
+ - يدعم المحادثة الطبيعية والفهم السياقي
102
+ - مناسب لمشاريع الـ Chatbots و Call Centers
103
+
104
+ ⚙️ **ملاحظات:**
105
+ - يعمل على GPU باستخدام ZeroGPU
106
+ - يستخدم `bfloat16` لتقليل استهلاك VRAM
107
+ """,
108
+ examples=[
109
+ ["شلونك اليوم؟"],
110
+ ["اشتعني كلمة دلع؟"],
111
+ ["احجيلي نكتة عراقية"],
112
+ ["ما الفرق بين الإنترنت والأنتربيت؟"],
113
+ ["الشركة تفتح من 9 للـ 5، شنو أوقات الدوام؟"],
114
  ],
115
+ theme=gr.themes.Soft(),
116
  )
117
 
118
+ # ======================================================
119
+ # إطلاق التطبيق
120
+ # ======================================================
121
  if __name__ == "__main__":
122
+ demo.launch(server_name="0.0.0.0", server_port=7860)