anaspro commited on
Commit
151da18
·
1 Parent(s): 2f06f2b
Files changed (1) hide show
  1. app.py +63 -25
app.py CHANGED
@@ -3,7 +3,7 @@
3
  import os
4
  import torch
5
  import transformers
6
- from transformers import pipeline
7
  import gradio as gr
8
  import spaces
9
 
@@ -22,21 +22,65 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
22
  # إذا كان فيه HF_TOKEN في البيئة
23
  hf_token = os.getenv("HF_TOKEN")
24
 
25
- # Initialize pipeline for chat
26
- pipeline_model = pipeline(
27
- "text-generation",
28
- model=model_path,
29
- device_map="auto",
30
  token=hf_token,
31
  trust_remote_code=True
32
  )
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
35
  """Generate response using the pipeline with messages format"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # Apply chat template
37
  try:
38
- prompt = pipeline_model.tokenizer.apply_chat_template(
39
- messages,
40
  tokenize=False,
41
  add_generation_prompt=True
42
  )
@@ -44,14 +88,12 @@ def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=
44
  print(f"Template application error: {template_error}")
45
  # Fallback: manually format messages
46
  prompt = ""
47
- for msg in messages:
48
- if msg['role'] == 'system':
49
- prompt += f"System: {msg['content']}\n\n"
50
- elif msg['role'] == 'user':
51
- prompt += f"User: {msg['content']}\n"
52
- elif msg['role'] == 'assistant':
53
- prompt += f"Assistant: {msg['content']}\n"
54
- prompt += "Assistant: "
55
 
56
  # Debug: print final prompt
57
  print(f"Final prompt preview: {prompt[:200]}...")
@@ -80,21 +122,17 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
80
  max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
81
  """
82
  try:
83
- # Build messages list - Gemma template expects alternating user/assistant after system
84
  messages = []
85
 
86
- # Add system message first (will be handled specially by the template)
87
  messages.append({"role": "system", "content": DEFAULT_SYSTEM_PROMPT})
88
 
89
- # Add conversation history (ensure alternating user/assistant)
90
  if history:
91
  for msg in history:
92
  if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
93
- # Convert 'assistant' role to 'model' for Gemma template if needed
94
- role = msg['role']
95
- if role == 'assistant':
96
- role = 'assistant' # Keep as assistant, template converts to 'model'
97
- messages.append({"role": role, "content": msg['content']})
98
 
99
  # Add current user message
100
  if isinstance(message, dict):
@@ -159,7 +197,7 @@ demo = gr.ChatInterface(
159
  - 🔧 دعم فني واستكشاف الأخطاء
160
  - 📋 معلومات الخدمات والإرشاد
161
  - 🧠 **يتذكر المحادثة السابقة** - يمكنك الرجوع للمواضيع السابقة
162
- - 🎯 مدعوم بـ موديل Unsloth Meta-Llama-3.1-8B-Instruct-bnb-4bit
163
 
164
  احجي مع أليكس لحل مشاكلك التقنية، استفسر عن الخدمات، أو احصل على معلومات المنتجات.""",
165
  fill_height=True,
 
3
  import os
4
  import torch
5
  import transformers
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
7
  import gradio as gr
8
  import spaces
9
 
 
22
  # إذا كان فيه HF_TOKEN في البيئة
23
  hf_token = os.getenv("HF_TOKEN")
24
 
25
+ # Initialize model and tokenizer separately for better control
26
+ print("Loading model and tokenizer...")
27
+ tokenizer = AutoTokenizer.from_pretrained(
28
+ model_path,
 
29
  token=hf_token,
30
  trust_remote_code=True
31
  )
32
 
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ model_path,
35
+ device_map="auto",
36
+ token=hf_token,
37
+ trust_remote_code=True,
38
+ torch_dtype=torch.bfloat16,
39
+ low_cpu_mem_usage=True,
40
+ quantization_config={
41
+ "load_in_4bit": True,
42
+ "bnb_4bit_use_double_quant": True,
43
+ "bnb_4bit_quant_type": "nf4",
44
+ "bnb_4bit_compute_dtype": torch.bfloat16
45
+ }
46
+ )
47
+
48
+ # Create pipeline with the loaded model
49
+ pipeline_model = pipeline(
50
+ "text-generation",
51
+ model=model,
52
+ tokenizer=tokenizer,
53
+ device_map="auto"
54
+ )
55
+
56
+ print("Model loaded successfully!")
57
+
58
  def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
59
  """Generate response using the pipeline with messages format"""
60
+ # Gemma expects messages in format: [{"role": "user", "content": "..."}, {"role": "model", "content": "..."}]
61
+ # Convert 'assistant' to 'model' for Gemma
62
+ gemma_messages = []
63
+
64
+ for msg in messages:
65
+ role = msg['role']
66
+ # Gemma uses 'model' instead of 'assistant'
67
+ if role == 'assistant':
68
+ role = 'model'
69
+ # Gemma doesn't use system role in the same way - prepend to first user message
70
+ if role == 'system':
71
+ continue # We'll handle system prompt differently
72
+ gemma_messages.append({"role": role, "content": msg['content']})
73
+
74
+ # If there's a system prompt, prepend it to the first user message
75
+ if messages and messages[0]['role'] == 'system' and gemma_messages:
76
+ system_content = messages[0]['content']
77
+ if gemma_messages[0]['role'] == 'user':
78
+ gemma_messages[0]['content'] = f"{system_content}\n\n{gemma_messages[0]['content']}"
79
+
80
  # Apply chat template
81
  try:
82
+ prompt = tokenizer.apply_chat_template(
83
+ gemma_messages,
84
  tokenize=False,
85
  add_generation_prompt=True
86
  )
 
88
  print(f"Template application error: {template_error}")
89
  # Fallback: manually format messages
90
  prompt = ""
91
+ for msg in gemma_messages:
92
+ if msg['role'] == 'user':
93
+ prompt += f"<start_of_turn>user\n{msg['content']}<end_of_turn>\n"
94
+ elif msg['role'] == 'model':
95
+ prompt += f"<start_of_turn>model\n{msg['content']}<end_of_turn>\n"
96
+ prompt += "<start_of_turn>model\n"
 
 
97
 
98
  # Debug: print final prompt
99
  print(f"Final prompt preview: {prompt[:200]}...")
 
122
  max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
123
  """
124
  try:
125
+ # Build messages list - Gemma template expects alternating user/model
126
  messages = []
127
 
128
+ # Add system message first (will be prepended to first user message)
129
  messages.append({"role": "system", "content": DEFAULT_SYSTEM_PROMPT})
130
 
131
+ # Add conversation history
132
  if history:
133
  for msg in history:
134
  if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
135
+ messages.append({"role": msg['role'], "content": msg['content']})
 
 
 
 
136
 
137
  # Add current user message
138
  if isinstance(message, dict):
 
197
  - 🔧 دعم فني واستكشاف الأخطاء
198
  - 📋 معلومات الخدمات والإرشاد
199
  - 🧠 **يتذكر المحادثة السابقة** - يمكنك الرجوع للمواضيع السابقة
200
+ - 🎯 مدعوم بـ موديل Gemma-3-4B-IT
201
 
202
  احجي مع أليكس لحل مشاكلك التقنية، استفسر عن الخدمات، أو احصل على معلومات المنتجات.""",
203
  fill_height=True,