anaspro commited on
Commit
087baa7
·
verified ·
1 Parent(s): cd190e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -16
app.py CHANGED
@@ -1,16 +1,9 @@
1
- from transformers import AutoTokenizer, TextIteratorStreamer
2
  import torch
3
  import gradio as gr
4
  import spaces
5
  from threading import Thread
6
 
7
- # نحاول استخدام Unsloth إذا متوفر
8
- try:
9
- from unsloth import FastLanguageModel
10
- HAS_UNSLOTH = True
11
- except ImportError:
12
- HAS_UNSLOTH = False
13
-
14
  # ======================================================
15
  # إعدادات الموديل
16
  # ======================================================
@@ -23,10 +16,16 @@ SYSTEM_PROMPT = (
23
  )
24
 
25
  # ======================================================
26
- # تحميل الموديل (مع دعم Unsloth أو Transformers العادي)
27
  # ======================================================
28
  print("🔄 Loading model:", MODEL_ID)
29
 
 
 
 
 
 
 
30
  if HAS_UNSLOTH:
31
  print("🚀 Using Unsloth FastLanguageModel backend")
32
  model, tokenizer = FastLanguageModel.from_pretrained(
@@ -37,7 +36,6 @@ if HAS_UNSLOTH:
37
  )
38
  else:
39
  print("⚙️ Using standard Transformers backend")
40
- from transformers import AutoModelForCausalLM
41
  model = AutoModelForCausalLM.from_pretrained(
42
  MODEL_ID,
43
  torch_dtype=torch.bfloat16,
@@ -52,9 +50,8 @@ print("✅ Model ready!\n")
52
  # ======================================================
53
  # دالة المحادثة
54
  # ======================================================
55
- @spaces.GPU(duration=60)
56
  def chat(message, history):
57
- # تحويل تاريخ المحادثة لصيغة messages
58
  messages = []
59
  for msg in history:
60
  if msg["role"] == "user":
@@ -65,14 +62,14 @@ def chat(message, history):
65
  # نضيف السؤال الحالي مع system prompt
66
  messages.append({"role": "user", "content": f"{SYSTEM_PROMPT}\n\nالسؤال: {message}"})
67
 
68
- # تجهيز الإدخال عبر الـ chat template
69
  input_ids = tokenizer.apply_chat_template(
70
  messages,
71
  return_tensors="pt",
72
  add_generation_prompt=True
73
  ).to(model.device)
74
 
75
- # إعداد Streamer للبث الحي
76
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
77
 
78
  generation_kwargs = dict(
@@ -85,7 +82,7 @@ def chat(message, history):
85
  repetition_penalty=1.15,
86
  )
87
 
88
- # تشغيل التوليد في Thread منفصل
89
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
90
  thread.start()
91
 
@@ -122,4 +119,4 @@ demo = gr.ChatInterface(
122
  )
123
 
124
  if __name__ == "__main__":
125
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
2
  import torch
3
  import gradio as gr
4
  import spaces
5
  from threading import Thread
6
 
 
 
 
 
 
 
 
7
  # ======================================================
8
  # إعدادات الموديل
9
  # ======================================================
 
16
  )
17
 
18
  # ======================================================
19
+ # تحميل الموديل (مع دعم Unsloth إذا متوفر)
20
  # ======================================================
21
  print("🔄 Loading model:", MODEL_ID)
22
 
23
+ try:
24
+ from unsloth import FastLanguageModel
25
+ HAS_UNSLOTH = True
26
+ except ImportError:
27
+ HAS_UNSLOTH = False
28
+
29
  if HAS_UNSLOTH:
30
  print("🚀 Using Unsloth FastLanguageModel backend")
31
  model, tokenizer = FastLanguageModel.from_pretrained(
 
36
  )
37
  else:
38
  print("⚙️ Using standard Transformers backend")
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  MODEL_ID,
41
  torch_dtype=torch.bfloat16,
 
50
  # ======================================================
51
  # دالة المحادثة
52
  # ======================================================
 
53
  def chat(message, history):
54
+ # تحويل تاريخ المحادثة إلى صيغة messages
55
  messages = []
56
  for msg in history:
57
  if msg["role"] == "user":
 
62
  # نضيف السؤال الحالي مع system prompt
63
  messages.append({"role": "user", "content": f"{SYSTEM_PROMPT}\n\nالسؤال: {message}"})
64
 
65
+ # تجهيز الإدخال عبر chat template
66
  input_ids = tokenizer.apply_chat_template(
67
  messages,
68
  return_tensors="pt",
69
  add_generation_prompt=True
70
  ).to(model.device)
71
 
72
+ # Streamer للبث الحي للنص الناتج
73
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
74
 
75
  generation_kwargs = dict(
 
82
  repetition_penalty=1.15,
83
  )
84
 
85
+ # تشغيل التوليد في Thread منفصل للبث المباشر
86
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
87
  thread.start()
88
 
 
119
  )
120
 
121
  if __name__ == "__main__":
122
+ demo.launch()