anaspro commited on
Commit
d11fb30
·
verified ·
1 Parent(s): de72440

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -54
app.py CHANGED
@@ -7,32 +7,33 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
7
  from huggingface_hub import login
8
 
9
  # ======================================================
10
- # إعدادات عامة
11
  # ======================================================
12
  MODEL_ID = "anaspro/gemma3-iraqi"
13
-
14
  SYSTEM_PROMPT = (
15
- "أنت مساعد ذكي تفهم اللهجة العراقية والعربية الفصحى. "
16
- "جاوب على الأسئلة بإيجاز ووضوح، بنفس لغة المستخدم. "
17
- "لا تستخدم مقدمات مثل (مرحباً أو بالتأكيد)، فقط الجواب المباشر."
18
  )
19
 
 
20
  if os.getenv("HF_TOKEN"):
21
  login(token=os.getenv("HF_TOKEN"))
22
- print("🔐 Logged in to Hugging Face Hub")
23
-
24
- print("✅ App initialized. Model will load on first GPU request.\n")
25
 
 
 
 
26
 
27
  # ======================================================
28
- # تحميل الموديل داخل ZeroGPU context
29
  # ======================================================
30
- @spaces.GPU(duration=60)
31
  def chat(message, history):
32
  global model, tokenizer
33
-
34
- if "model" not in globals():
35
- print("🔄 Loading model on GPU...")
 
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL_ID,
@@ -40,68 +41,75 @@ def chat(message, history):
40
  device_map="auto",
41
  )
42
  model.eval()
43
- print("✅ Model loaded successfully on GPU!")
44
-
45
- # تجهيز سجل المحادثة
46
- messages = []
47
- for msg in history:
48
- messages.append(msg)
49
-
50
- messages.append({"role": "user", "content": f"{SYSTEM_PROMPT}\n\nالسؤال: {message}"})
51
-
 
 
 
 
 
52
  input_ids = tokenizer.apply_chat_template(
53
  messages,
54
  return_tensors="pt",
55
  add_generation_prompt=True
56
  ).to(model.device)
57
-
58
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
59
-
60
- generation_kwargs = dict(
61
- input_ids=input_ids,
62
- streamer=streamer,
63
- max_new_tokens=512,
64
- temperature=0.8,
65
- top_p=0.95,
66
- do_sample=True,
67
- repetition_penalty=1.05,
68
  )
69
-
 
 
 
 
 
 
 
 
 
 
 
 
70
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
71
  thread.start()
72
-
 
73
  partial_text = ""
74
  for new_text in streamer:
75
  partial_text += new_text
76
- yield partial_text.strip()
77
-
78
  thread.join()
79
 
80
-
81
  # ======================================================
82
- # واجهة Gradio
83
  # ======================================================
84
  demo = gr.ChatInterface(
85
  fn=chat,
86
  type="messages",
87
- title="🇮🇶 Gemma 3 Iraqi Chat – ZeroGPU Edition",
88
- description="""
89
- **نموذج Gemma 3 Iraqi 🇮🇶**
90
- يدعم اللهجة العراقية والعربية الفصحى.
91
- يعمل على ZeroGPU ويُحمّل الموديل فقط عند أول طلب.
92
-
93
- 🧠 أمثلة:
94
- - "شلونك اليوم؟"
95
- - "اشرحلي شنو يعني تصعيد إداري"
96
- - "وضحلي الفرق بين الدليل والعبرة"
97
- """,
98
  examples=[
99
- ["شلونك اليوم؟"],
100
- ["اشرحلي شنو يعني تصعيد إداري"],
101
- ["وضحلي الفرق بين الدليل والعبرة"],
102
  ],
103
  theme=gr.themes.Soft(),
 
104
  )
105
 
106
  if __name__ == "__main__":
107
- demo.launch()
 
7
  from huggingface_hub import login
8
 
9
  # ======================================================
10
+ # Settings
11
  # ======================================================
12
  MODEL_ID = "anaspro/gemma3-iraqi"
 
13
  SYSTEM_PROMPT = (
14
+ "أنت مساعد ذكي يفهم اللهجة العراقية. "
15
+ "جاوب بشكل مباشر وواضح بنفس لغة المستخدم."
 
16
  )
17
 
18
+ # Login to HF
19
  if os.getenv("HF_TOKEN"):
20
  login(token=os.getenv("HF_TOKEN"))
21
+ print("🔐 Logged in to Hugging Face")
 
 
22
 
23
+ # Global variables
24
+ model = None
25
+ tokenizer = None
26
 
27
  # ======================================================
28
+ # Chat function with ZeroGPU
29
  # ======================================================
30
+ @spaces.GPU(duration=120) # زودت المدة
31
  def chat(message, history):
32
  global model, tokenizer
33
+
34
+ # Load model once
35
+ if model is None:
36
+ print("🔄 Loading model...")
37
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
38
  model = AutoModelForCausalLM.from_pretrained(
39
  MODEL_ID,
 
41
  device_map="auto",
42
  )
43
  model.eval()
44
+ print("✅ Model loaded!")
45
+
46
+ # Build conversation
47
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
48
+
49
+ # Add history
50
+ for user_msg, bot_msg in history:
51
+ messages.append({"role": "user", "content": user_msg})
52
+ messages.append({"role": "assistant", "content": bot_msg})
53
+
54
+ # Add current message
55
+ messages.append({"role": "user", "content": message})
56
+
57
+ # Tokenize
58
  input_ids = tokenizer.apply_chat_template(
59
  messages,
60
  return_tensors="pt",
61
  add_generation_prompt=True
62
  ).to(model.device)
63
+
64
+ # Setup streamer
65
+ streamer = TextIteratorStreamer(
66
+ tokenizer,
67
+ skip_prompt=True,
68
+ skip_special_tokens=True
 
 
 
 
 
69
  )
70
+
71
+ generation_kwargs = {
72
+ "input_ids": input_ids,
73
+ "streamer": streamer,
74
+ "max_new_tokens": 512,
75
+ "temperature": 0.7,
76
+ "top_p": 0.9,
77
+ "top_k": 50,
78
+ "do_sample": True,
79
+ "repetition_penalty": 1.1,
80
+ }
81
+
82
+ # Start generation in thread
83
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
84
  thread.start()
85
+
86
+ # Stream response
87
  partial_text = ""
88
  for new_text in streamer:
89
  partial_text += new_text
90
+ yield partial_text
91
+
92
  thread.join()
93
 
 
94
  # ======================================================
95
+ # Gradio Interface
96
  # ======================================================
97
  demo = gr.ChatInterface(
98
  fn=chat,
99
  type="messages",
100
+ title="🇮🇶 Gemma 3 Iraqi Assistant",
101
+ description=(
102
+ "**نموذج Gemma 3 مدرب على اللهجة العراقية**\n\n"
103
+ "اسأل أي سؤال باللهجة العراقية أو العربية الفصحى!"
104
+ ),
 
 
 
 
 
 
105
  examples=[
106
+ ["شلونك؟"],
107
+ ["شنو الفرق بين البرمجة والذكاء الاصطناعي؟"],
108
+ ["علمني اسوي دولمة"],
109
  ],
110
  theme=gr.themes.Soft(),
111
+ cache_examples=False,
112
  )
113
 
114
  if __name__ == "__main__":
115
+ demo.launch()