anaspro commited on
Commit
5f4ed60
·
verified ·
1 Parent(s): 7d54994

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -33
app.py CHANGED
@@ -10,63 +10,78 @@ from huggingface_hub import login
10
  # Settings
11
  # ======================================================
12
  MODEL_ID = "anaspro/gemma3-iraqi"
 
13
  # Load system prompt from external file
14
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
15
  SYSTEM_PROMPT = f.read()
16
 
17
- # Login to HF
18
  if os.getenv("HF_TOKEN"):
19
  login(token=os.getenv("HF_TOKEN"))
20
  print("🔐 Logged in to Hugging Face")
21
 
22
- # Global variables
23
  model = None
24
  tokenizer = None
25
 
26
  # ======================================================
27
- # Chat function with ZeroGPU
28
  # ======================================================
29
- @spaces.GPU(duration=120) # زودت المدة
30
  def chat(message, history):
31
  global model, tokenizer
32
-
33
  # Load model once
34
  if model is None:
35
  print("🔄 Loading model...")
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL_ID,
39
- torch_dtype=torch.bfloat16,
40
  device_map="auto",
41
  )
42
  model.eval()
43
  print("✅ Model loaded!")
44
-
 
 
 
45
  # Build conversation
 
46
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
47
-
48
- # Add history
49
- for user_msg, bot_msg in history:
50
- messages.append({"role": "user", "content": user_msg})
51
- messages.append({"role": "assistant", "content": bot_msg})
52
-
53
- # Add current message
 
 
 
 
 
 
54
  messages.append({"role": "user", "content": message})
55
-
56
- # Tokenize
 
 
57
  input_ids = tokenizer.apply_chat_template(
58
  messages,
59
  return_tensors="pt",
60
  add_generation_prompt=True
61
  ).to(model.device)
62
-
63
- # Setup streamer
 
 
64
  streamer = TextIteratorStreamer(
65
- tokenizer,
66
- skip_prompt=True,
67
  skip_special_tokens=True
68
  )
69
-
70
  generation_kwargs = {
71
  "input_ids": input_ids,
72
  "streamer": streamer,
@@ -77,38 +92,40 @@ def chat(message, history):
77
  "do_sample": True,
78
  "repetition_penalty": 1.1,
79
  }
80
-
81
- # Start generation in thread
 
 
82
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
83
  thread.start()
84
-
85
- # Stream response
86
  partial_text = ""
87
  for new_text in streamer:
88
  partial_text += new_text
89
  yield partial_text
90
-
91
  thread.join()
92
 
 
93
  # ======================================================
94
  # Gradio Interface
95
  # ======================================================
96
  demo = gr.ChatInterface(
97
  fn=chat,
98
  type="messages",
99
- title="🇮🇶 Gemma 3 Iraqi Assistant",
100
  description=(
101
- "**نموذج Gemma 3 مدرب على اللهجة العراقية**\n\n"
102
- "اسأل أي سؤال باللهجة العراقية أو العربية الفصحى!"
103
  ),
104
  examples=[
105
- ["شلونك؟"],
106
- ["شنو الفرق بين البرمجة والذكاء الاصطناعي؟"],
107
- ["علمني اسوي دولمة"],
108
  ],
109
  theme=gr.themes.Soft(),
110
  cache_examples=False,
111
  )
112
 
113
  if __name__ == "__main__":
114
- demo.launch()
 
10
  # Settings
11
  # ======================================================
12
  MODEL_ID = "anaspro/gemma3-iraqi"
13
+
14
  # Load system prompt from external file
15
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
16
  SYSTEM_PROMPT = f.read()
17
 
18
+ # Login to Hugging Face
19
  if os.getenv("HF_TOKEN"):
20
  login(token=os.getenv("HF_TOKEN"))
21
  print("🔐 Logged in to Hugging Face")
22
 
23
+ # Global model variables
24
  model = None
25
  tokenizer = None
26
 
27
  # ======================================================
28
+ # Chat function (ZeroGPU)
29
  # ======================================================
30
+ @spaces.GPU(duration=120)
31
  def chat(message, history):
32
  global model, tokenizer
33
+
34
  # Load model once
35
  if model is None:
36
  print("🔄 Loading model...")
37
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
38
  model = AutoModelForCausalLM.from_pretrained(
39
  MODEL_ID,
40
+ dtype=torch.bfloat16,
41
  device_map="auto",
42
  )
43
  model.eval()
44
  print("✅ Model loaded!")
45
+ else:
46
+ print("♻️ Reusing already loaded model in memory.")
47
+
48
+ # ======================================================
49
  # Build conversation
50
+ # ======================================================
51
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
52
+
53
+ # Add conversation history
54
+ for turn in history:
55
+ if isinstance(turn, dict):
56
+ role = turn.get("role")
57
+ content = turn.get("content")
58
+ if role and content:
59
+ messages.append({"role": role, "content": content})
60
+ elif isinstance(turn, (list, tuple)) and len(turn) == 2:
61
+ messages.append({"role": "user", "content": turn[0]})
62
+ messages.append({"role": "assistant", "content": turn[1]})
63
+
64
+ # Add current user message
65
  messages.append({"role": "user", "content": message})
66
+
67
+ # ======================================================
68
+ # Tokenize input
69
+ # ======================================================
70
  input_ids = tokenizer.apply_chat_template(
71
  messages,
72
  return_tensors="pt",
73
  add_generation_prompt=True
74
  ).to(model.device)
75
+
76
+ # ======================================================
77
+ # Setup text streamer
78
+ # ======================================================
79
  streamer = TextIteratorStreamer(
80
+ tokenizer,
81
+ skip_prompt=True,
82
  skip_special_tokens=True
83
  )
84
+
85
  generation_kwargs = {
86
  "input_ids": input_ids,
87
  "streamer": streamer,
 
92
  "do_sample": True,
93
  "repetition_penalty": 1.1,
94
  }
95
+
96
+ # ======================================================
97
+ # Generate output in a separate thread
98
+ # ======================================================
99
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
100
  thread.start()
101
+
 
102
  partial_text = ""
103
  for new_text in streamer:
104
  partial_text += new_text
105
  yield partial_text
106
+
107
  thread.join()
108
 
109
+
110
  # ======================================================
111
  # Gradio Interface
112
  # ======================================================
113
  demo = gr.ChatInterface(
114
  fn=chat,
115
  type="messages",
116
+ title="📞 دعم فني - NB TEL Internet Assistant",
117
  description=(
118
+ "**مساعد ذكي لخدمة الدعم الفني في شبكة النور - NB TEL**\n\n"
119
+ "تحدث معه كأنك زبون: اشرح مشكلتك، اسأل عن الباقات، أو اطلب تذكرة دعم."
120
  ),
121
  examples=[
122
+ ["الإنترنت عندي مقطوع من الصبح، شنو السبب؟"],
123
+ ["أريد أرقّي الباقة إلى 50 ميج."],
124
+ ["ضوء الـ LOS في جهاز الفايبر أحمر، شنو معناها؟"],
125
  ],
126
  theme=gr.themes.Soft(),
127
  cache_examples=False,
128
  )
129
 
130
  if __name__ == "__main__":
131
+ demo.launch()