anaspro commited on
Commit
eef2265
·
1 Parent(s): 40db06d

Implement official Harmony format with openai-harmony package

Browse files

Major improvements:
- Use openai-harmony package for proper GPT-OSS Harmony format
- Implement reasoning effort levels (low/medium/high) parsing
- Add thinking process separation with collapsible UI
- Use pipeline API instead of manual model loading
- Parse and display chain-of-thought reasoning
- Support System and Developer role messages
- Add Arabic interface with thinking process display
- Update examples to showcase reasoning capabilities
- Simplify code by using official OpenAI harmony encoding

This enables:
✅ Proper Harmony response format
✅ Adjustable reasoning levels
✅ Visible thinking process (chain-of-thought)
✅ Better Arabic support
✅ Cleaner, more maintainable code

Files changed (2) hide show
  1. app.py +216 -280
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,325 +1,261 @@
1
  import os
2
- import torch
3
  import gradio as gr
4
  import spaces
5
- import json
6
- import time
7
  from threading import Thread
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig
9
  from huggingface_hub import login
10
  import logging
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Setup logging
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- # ======================================================
17
- # Load Configuration
18
- # ======================================================
19
- def load_config():
20
- """Load configuration from config.json"""
21
- try:
22
- with open("config.json", "r", encoding="utf-8") as f:
23
- return json.load(f)
24
- except FileNotFoundError:
25
- logger.warning("config.json not found, using default settings")
26
- return {
27
- "model": {"model_id": "unsloth/gpt-oss-20b-GGUF"},
28
- "generation": {
29
- "max_new_tokens": 1024,
30
- "temperature": 1,
31
- "top_p": 0.95,
32
- "top_k": 64,
33
- "do_sample": True,
34
- "repetition_penalty": 1.1,
35
- "timeout_seconds": 60
36
- },
37
- "interface": {"max_context_length": 4096}
38
- }
39
 
40
- config = load_config()
 
 
 
41
 
42
  # ======================================================
43
- # Settings
44
  # ======================================================
45
- MODEL_ID = config["model"].get("model_id", "anaspro/Lahja-iraqi-4B")
46
-
47
- # Load system prompt from external file
48
  try:
49
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
50
- SYSTEM_PROMPT = f.read()
51
  except FileNotFoundError:
52
  logger.warning("system_prompt.txt not found, using default prompt")
53
- SYSTEM_PROMPT = "أنت مساعد ذكي مفيد. تحدث بالعربية وساعد المستخدم في استفساراته."
 
54
 
55
- # Login to Hugging Face
56
- if os.getenv("HF_TOKEN"):
57
- login(token=os.getenv("HF_TOKEN"))
58
- logger.info("🔐 Logged in to Hugging Face")
59
-
60
- # Global model variables
61
- model = None
62
- tokenizer = None
63
- model_lock = False
64
 
65
  # ======================================================
66
- # Model loading function
67
  # ======================================================
68
- def load_model():
69
- """Load the model and tokenizer with proper error handling"""
70
- global model, tokenizer, model_lock
71
-
72
- if model_lock:
73
- logger.info("Model loading already in progress...")
74
- return False
75
-
76
- model_lock = True
77
- try:
78
- logger.info("🔄 Loading model...")
79
-
80
- # Load tokenizer first
81
- tokenizer = AutoTokenizer.from_pretrained(
82
- MODEL_ID,
83
- trust_remote_code=True,
84
- use_fast=True
85
- )
86
-
87
- # Add padding token if missing
88
- if tokenizer.pad_token is None:
89
- tokenizer.pad_token = tokenizer.eos_token
90
-
91
- # Configure 4-bit quantization
92
- if config["model"].get("load_in_4bit", False):
93
- quantization_config = BitsAndBytesConfig(
94
- load_in_4bit=True,
95
- bnb_4bit_compute_dtype=torch.float16,
96
- bnb_4bit_use_double_quant=True,
97
- bnb_4bit_quant_type="nf4"
98
- )
99
- else:
100
- quantization_config = None
101
-
102
- # Load model with optimized settings
103
- model = AutoModelForCausalLM.from_pretrained(
104
- MODEL_ID,
105
- torch_dtype=config["model"].get("torch_dtype", "auto"),
106
- device_map=config["model"].get("device_map", "auto"),
107
- trust_remote_code=config["model"].get("trust_remote_code", True),
108
- low_cpu_mem_usage=config["model"].get("low_cpu_mem_usage", True),
109
- quantization_config=quantization_config
110
- )
111
-
112
- model.eval()
113
-
114
- # Clear cache to free memory
115
- if torch.cuda.is_available():
116
- torch.cuda.empty_cache()
117
-
118
- logger.info("✅ Model loaded successfully!")
119
- return True
120
-
121
- except Exception as e:
122
- logger.error(f"❌ Error loading model: {str(e)}")
123
- return False
124
- finally:
125
- model_lock = False
126
 
127
  # ======================================================
128
- # Chat function (ZeroGPU)
129
  # ======================================================
130
- @spaces.GPU(duration=120)
131
- def chat(message, history):
132
- """Main chat function with improved error handling and conversation management"""
133
- global model, tokenizer
134
 
135
- # Check if model is loaded
136
- if model is None or tokenizer is None:
137
- return "❌ عذراً، النموذج لم يتم تحميله بعد. يرجى الانتظار قليلاً والمحاولة مرة أخرى."
138
 
139
- try:
140
- # ======================================================
141
- # Build conversation properly
142
- # ======================================================
143
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
144
-
145
- # Process conversation history correctly
146
- if history:
147
- for exchange in history:
148
- if isinstance(exchange, dict):
149
- # Handle message format from Gradio
150
- if exchange.get("role") == "user":
151
- messages.append({"role": "user", "content": exchange.get("content", "")})
152
- elif exchange.get("role") == "assistant":
153
- messages.append({"role": "assistant", "content": exchange.get("content", "")})
154
- elif isinstance(exchange, (list, tuple)) and len(exchange) >= 2:
155
- # Handle [user_msg, assistant_msg] format
156
- if exchange[0]: # User message
157
- messages.append({"role": "user", "content": str(exchange[0])})
158
- if exchange[1]: # Assistant message
159
- messages.append({"role": "assistant", "content": str(exchange[1])})
160
-
161
- # Add current user message
162
- if message and message.strip():
163
- # فلتر للتأكد من أن الموضوع متعلق بالإنترنت
164
- internet_keywords = ["نت", "انترنت", "مودم", "wifi", "باقة", "سرعة", "كابل", "راوتر", "فايبر", "اتصال", "شبكة", "تحميل", "رفع", "ميجا", "جيجا"]
165
- message_lower = message.lower()
166
-
167
- # إذا الرسالة تحتوي على كلمات متعلقة بالإنترنت أو أسئلة عامة قصيرة
168
- has_internet_keywords = any(keyword in message_lower for keyword in internet_keywords)
169
- is_short_question = len(message.strip()) < 50 # الأسئلة القصيرة مسموحة
170
-
171
- if has_internet_keywords or is_short_question:
172
- messages.append({"role": "user", "content": message.strip()})
173
- else:
174
- return "آسف، انا هنا حتى اساعدك بمشاكل النت والباقات بس. شنو مشكلتك بالإنترنت؟"
175
- else:
176
- return "يرجى كتابة رسالة صحيحة."
177
-
178
- # ======================================================
179
- # Tokenize input with error handling
180
- # ======================================================
181
- try:
182
- max_length = config.get("interface", {}).get("max_context_length", 4096)
183
- input_ids = tokenizer.apply_chat_template(
184
- messages,
185
- return_tensors="pt",
186
- add_generation_prompt=True,
187
- truncation=True,
188
- max_length=max_length
189
- ).to(model.device)
190
- except Exception as e:
191
- logger.error(f"Tokenization error: {e}")
192
- return "❌ خطأ في معالجة الرسالة. يرجى المحاولة مرة أخرى."
193
-
194
- # ======================================================
195
- # Setup text streamer
196
- # ======================================================
197
- streamer = TextIteratorStreamer(
198
- tokenizer,
199
- skip_prompt=True,
200
- skip_special_tokens=True,
201
- clean_up_tokenization_spaces=True
202
- )
203
 
204
- generation_config = config.get("generation", {})
205
- generation_kwargs = {
206
- "input_ids": input_ids,
207
- "streamer": streamer,
208
- "max_new_tokens": generation_config.get("max_new_tokens", 800), # تقليل أكثر لمنع الهلوسة
209
- "min_new_tokens": 15, # حد أدنى معقول
210
- "temperature": generation_config.get("temperature", 0.6), # تقليل العشوائية أكثر
211
- "top_p": generation_config.get("top_p", 0.85), # تقليل التنوع للتحكم
212
- "top_k": generation_config.get("top_k", 30), # تشديد القيود
213
- "do_sample": generation_config.get("do_sample", True),
214
- "repetition_penalty": generation_config.get("repetition_penalty", 1.15), # زيادة عقوبة التكرار
215
- "no_repeat_ngram_size": 4, # منع تكرار العبارات الأطول
216
- "early_stopping": True, # توقف مبكر للجمل المكتملة
217
- "pad_token_id": tokenizer.pad_token_id,
218
- "eos_token_id": tokenizer.eos_token_id,
219
- "use_cache": True
220
- }
221
 
222
- # ======================================================
223
- # Generate output in a separate thread with timeout
224
- # ======================================================
225
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
226
- thread.daemon = True
227
- thread.start()
228
-
229
- partial_text = ""
230
- start_time = time.time()
231
- timeout = config.get("generation", {}).get("timeout_seconds", 60)
232
-
233
- # كلمات تشير إلى بداية حوار جديد
234
- dialogue_indicators = ["👤", "🤖", "العميل:", "الزبون:", "المساعد:", "العضو:", "السؤال:", "الجواب:"]
235
-
236
- try:
237
- for new_text in streamer:
238
- if time.time() - start_time > timeout:
239
- logger.warning("Generation timeout reached")
240
- break
241
-
242
- partial_text += new_text
243
-
244
- # إيقاف التوليد إذا بدأ النموذج بكتابة حوار
245
- for indicator in dialogue_indicators:
246
- if indicator in partial_text[50:]: # تجاهل أول 50 حرف
247
- logger.info("Stopping generation - dialogue detected")
248
- return partial_text[:partial_text.find(indicator, 50)].strip()
249
-
250
- yield partial_text
251
- except Exception as e:
252
- logger.error(f"Generation error: {e}")
253
- yield "❌ حدث خطأ أثناء توليد الإجابة. يرجى المحاولة مرة أخرى."
254
-
255
- thread.join(timeout=5) # Give thread 5 seconds to finish
256
-
257
- # Clear GPU cache after generation
258
- if torch.cuda.is_available():
259
- torch.cuda.empty_cache()
260
-
261
- except Exception as e:
262
- logger.error(f"Chat function error: {e}")
263
- return f"❌ حدث خطأ غير متوقع: {str(e)}"
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  # ======================================================
267
- # Gradio Interface with enhanced styling
268
  # ======================================================
269
- def create_interface():
270
- """Create the Gradio interface with enhanced UI"""
 
271
 
272
- # Custom CSS for better styling
273
- custom_css = """
274
- .gradio-container {
275
- max-width: 1000px !important;
276
- margin: auto !important;
277
- }
278
- .chat-message {
279
- padding: 10px !important;
280
- margin: 5px 0 !important;
281
- border-radius: 10px !important;
282
- }
283
- .message {
284
- font-size: 16px !important;
285
- line-height: 1.5 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
- """
288
 
289
- # Create a simpler interface for better compatibility
290
- demo = gr.ChatInterface(
291
- fn=chat,
292
- type="messages",
293
- title="📞 دعم فني - NB TEL مساعد عراقي",
294
- description="**مساعد ذكي متقدم يعتمد على GPT-OSS-20B من OpenAI للدعم الفني بشبكة النور - NB TEL**\n\n✨ قدرات متقدمة: تفكير منطقي، حلول خطوة بخطوة، تحليل شامل\n\nاحجي معاه كأنك زبون: اشرح مشكلتك، اسأل عن الباقات، او اطلب تذكرة دعم.",
295
- examples=[
296
- ["النت عندي بطيء جداً رغم باقة 100 ميجا. شرحلي الأسباب المحتملة والحلول."],
297
- ["أريد فهم ليش النت بطيء. شرحلي خطوة بخطوة الأسباب والحلول."],
298
- ["كم سعر باقة 60 ميجا وما هي مزاياها؟"],
299
- ["جهازي يظهر متصل بس المواقع ما تفتح. ساعدني أشخيص المشكلة."],
300
- ["أنا صاحب مؤسسة، أي باقة تناسب 10 موظفين وكم التكلفة؟"],
301
- ["شلون اغير كلمة مرور الواي فاي خطوة بخطوة؟"],
302
- ["النت ينقطع فجأة ويعود. ما السبب وكيف أصلحه؟"]
303
- ],
304
- cache_examples=False,
305
- theme=gr.themes.Soft(
306
- primary_hue="blue",
307
- secondary_hue="gray",
308
- neutral_hue="slate"
309
- ),
310
- css=custom_css
311
- )
312
 
313
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
  # ======================================================
316
- # Load model on startup (before creating interface)
317
  # ======================================================
318
- logger.info("🚀 Starting application - loading model...")
319
- load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
- # Create the interface
322
- demo = create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  if __name__ == "__main__":
325
  demo.launch()
 
1
  import os
 
2
  import gradio as gr
3
  import spaces
4
+ import re
 
5
  from threading import Thread
6
+ from transformers import pipeline, TextIteratorStreamer
7
  from huggingface_hub import login
8
  import logging
9
+ from openai_harmony import (
10
+ load_harmony_encoding,
11
+ HarmonyEncodingName,
12
+ Role,
13
+ Message,
14
+ Conversation,
15
+ SystemContent,
16
+ DeveloperContent,
17
+ ReasoningEffort,
18
+ )
19
 
20
  # Setup logging
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
+ # Login to Hugging Face
25
+ if os.getenv("HF_TOKEN"):
26
+ login(token=os.getenv("HF_TOKEN"))
27
+ logger.info("🔐 Logged in to Hugging Face")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Regex config for parsing reasoning and output
30
+ RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
31
+ RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
32
+ RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
33
 
34
  # ======================================================
35
+ # Load System Prompt
36
  # ======================================================
 
 
 
37
  try:
38
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
39
+ DEFAULT_SYSTEM_PROMPT = f.read()
40
  except FileNotFoundError:
41
  logger.warning("system_prompt.txt not found, using default prompt")
42
+ DEFAULT_SYSTEM_PROMPT = """أنت مساعد ذكي متقدم يعتمد على نموذج GPT-OSS-20B من OpenAI مع دعم فني لشركة NB TEL.
43
+ تحجي بالعراقي بأسلوب مهني ومحترف.
44
 
45
+ Reasoning: high - استخدم مستوى تفكير عالي للتحليل المتعمق والحلول المتقدمة."""
 
 
 
 
 
 
 
 
46
 
47
  # ======================================================
48
+ # Parse Reasoning Level from System Prompt
49
  # ======================================================
50
+ def parse_reasoning_and_instructions(system_prompt: str):
51
+ """Parse reasoning effort level from system prompt"""
52
+ instructions = system_prompt or "You are a helpful assistant."
53
+ match = RE_REASONING.search(instructions)
54
+ effort_key = match.group(1).lower() if match else 'medium'
55
+ effort = {
56
+ 'low': ReasoningEffort.LOW,
57
+ 'medium': ReasoningEffort.MEDIUM,
58
+ 'high': ReasoningEffort.HIGH,
59
+ }.get(effort_key, ReasoningEffort.MEDIUM)
60
+ cleaned_instructions = RE_REASONING.sub('', instructions).strip()
61
+ return effort, cleaned_instructions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # ======================================================
64
+ # Load Model and Harmony Encoding
65
  # ======================================================
66
+ logger.info("🚀 Loading GPT-OSS-20B model...")
 
 
 
67
 
68
+ model_id = "unsloth/gpt-oss-20b-unsloth-bnb-4bit"
 
 
69
 
70
+ pipe = pipeline(
71
+ "text-generation",
72
+ model=model_id,
73
+ torch_dtype="auto",
74
+ device_map="auto",
75
+ trust_remote_code=True,
76
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ logger.info("✅ Model and harmony encoding loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # ======================================================
83
+ # Format Conversation History
84
+ # ======================================================
85
+ def format_conversation_history(chat_history):
86
+ """Format Gradio chat history to standard message format"""
87
+ messages = []
88
+ for item in chat_history:
89
+ role = item["role"]
90
+ content = item["content"]
91
+ if isinstance(content, list):
92
+ content = content[0]["text"] if content and "text" in content[0] else str(content)
93
+ messages.append({"role": role, "content": content})
94
+ return messages
95
 
96
  # ======================================================
97
+ # Generate Response with Harmony Format
98
  # ======================================================
99
+ @spaces.GPU(duration=120)
100
+ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
101
+ """Generate response using GPT-OSS with Harmony format"""
102
 
103
+ # Create new user message
104
+ new_message = {"role": "user", "content": input_data}
105
+ processed_history = format_conversation_history(chat_history)
106
+
107
+ # Parse reasoning effort from system prompt
108
+ effort, instructions = parse_reasoning_and_instructions(system_prompt)
109
+
110
+ # Build harmony messages with proper system and developer roles
111
+ system_content = SystemContent.new().with_reasoning_effort(effort)
112
+ developer_content = DeveloperContent.new().with_instructions(instructions)
113
+
114
+ harmony_messages = [
115
+ Message.from_role_and_content(Role.SYSTEM, system_content),
116
+ Message.from_role_and_content(Role.DEVELOPER, developer_content),
117
+ ]
118
+
119
+ # Add conversation history
120
+ for m in processed_history + [new_message]:
121
+ role = Role.USER if m["role"] == "user" else Role.ASSISTANT
122
+ harmony_messages.append(Message.from_role_and_content(role, m["content"]))
123
+
124
+ # Render conversation using harmony encoding
125
+ conversation = Conversation.from_messages(harmony_messages)
126
+ prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
127
+ prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
128
+
129
+ # Setup streaming
130
+ streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
131
+
132
+ generation_kwargs = {
133
+ "max_new_tokens": max_new_tokens,
134
+ "do_sample": True,
135
+ "temperature": temperature,
136
+ "top_p": top_p,
137
+ "top_k": top_k,
138
+ "repetition_penalty": repetition_penalty,
139
+ "streamer": streamer,
140
+ "return_full_text": False,
141
  }
 
142
 
143
+ # Generate in separate thread
144
+ thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
145
+ thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ # Parse thinking process and final answer
148
+ thinking = ""
149
+ final = ""
150
+ started_final = False
151
+
152
+ for chunk in streamer:
153
+ if not started_final:
154
+ parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
155
+ thinking += parts[0]
156
+ if len(parts) > 1:
157
+ final += parts[-1]
158
+ started_final = True
159
+ else:
160
+ final += chunk
161
+
162
+ # Clean and format output
163
+ clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
164
+ clean_final = final.strip()
165
+
166
+ # Format with collapsible thinking section
167
+ if clean_thinking:
168
+ formatted = f"<details open><summary>🧠 عرض عملية التفكير (Thinking Process)</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
169
+ else:
170
+ formatted = clean_final
171
+
172
+ yield formatted
173
 
174
  # ======================================================
175
+ # Create Gradio Interface
176
  # ======================================================
177
+ demo = gr.ChatInterface(
178
+ fn=generate_response,
179
+ additional_inputs=[
180
+ gr.Slider(
181
+ label="Max New Tokens",
182
+ minimum=64,
183
+ maximum=4096,
184
+ step=1,
185
+ value=2048
186
+ ),
187
+ gr.Textbox(
188
+ label="System Prompt",
189
+ value=DEFAULT_SYSTEM_PROMPT,
190
+ lines=6,
191
+ placeholder="يمكنك تعديل التعليمات والمستوى: Reasoning: low/medium/high"
192
+ ),
193
+ gr.Slider(
194
+ label="Temperature",
195
+ minimum=0.1,
196
+ maximum=2.0,
197
+ step=0.1,
198
+ value=0.7
199
+ ),
200
+ gr.Slider(
201
+ label="Top-p",
202
+ minimum=0.05,
203
+ maximum=1.0,
204
+ step=0.05,
205
+ value=0.9
206
+ ),
207
+ gr.Slider(
208
+ label="Top-k",
209
+ minimum=1,
210
+ maximum=100,
211
+ step=1,
212
+ value=50
213
+ ),
214
+ gr.Slider(
215
+ label="Repetition Penalty",
216
+ minimum=1.0,
217
+ maximum=2.0,
218
+ step=0.05,
219
+ value=1.0
220
+ )
221
+ ],
222
+ examples=[
223
+ [{"text": "النت عندي بطيء جداً رغم باقة 100 ميجا. شرحلي الأسباب المحتملة والحلول خطوة بخطوة."}],
224
+ [{"text": "أريد فهم ليش النت بطيء. حللها بالتفصيل وأعطني حلول مرقمة."}],
225
+ [{"text": "كم سعر باقة 60 ميجا وما هي مزاياها بالمقارنة مع الباقات الأخرى؟"}],
226
+ [{"text": "جهازي يظهر متصل بس المواقع ما تفتح. ساعدني أشخيص المشكلة بالتفصيل."}],
227
+ [{"text": "أنا صاحب مؤسسة، أي باقة تناسب 10 موظفين؟ حلل الاحتياجات والتكلفة."}],
228
+ [{"text": "شلون اغير كلمة مرور الواي فاي خطوة بخطوة؟"}],
229
+ [{"text": "النت ينقطع فجأة ويعود. حلل السبب واعطني حل شامل."}],
230
+ ],
231
+ cache_examples=False,
232
+ type="messages",
233
+ title="📞 مساعد GPT-OSS-20B للدعم الفني - NB TEL",
234
+ description="""**🤖 مساعد ذكي متقدم يعتمد على GPT-OSS-20B من OpenAI للدعم الفني بشبكة النور - NB TEL**
235
+
236
+ ✨ **قدرات متقدمة:**
237
+ - 🧠 تفكير منطقي عميق (Chain-of-Thought)
238
+ - 📊 حلول خطوة بخطوة مع التحليل
239
+ - 🎯 مستويات تفكير قابلة للتعديل (Reasoning: low/medium/high)
240
+ - 💬 دعم كامل للغة العربية العراقية
241
+ - 🔧 تشخيص وحلول متقدمة للمشاكل التقنية
242
+
243
+ **احجي معاه كأنك زبون:** اشرح مشكلتك، اسأل عن الباقات، او اطلب تذكرة دعم.
244
 
245
+ *يمكنك رؤية عملية التفكير (Thinking Process) عند النقر على السهم أعلى الإجابة.*""",
246
+ fill_height=True,
247
+ textbox=gr.Textbox(
248
+ label="رسالتك",
249
+ placeholder="اكتب مشكلتك أو سؤالك هنا..."
250
+ ),
251
+ stop_btn="إيقاف التوليد",
252
+ multimodal=False,
253
+ theme=gr.themes.Soft(
254
+ primary_hue="blue",
255
+ secondary_hue="gray",
256
+ neutral_hue="slate"
257
+ ),
258
+ )
259
 
260
  if __name__ == "__main__":
261
  demo.launch()
requirements.txt CHANGED
@@ -6,6 +6,7 @@ torch>=2.0.0
6
  bitsandbytes>=0.40.0
7
  huggingface_hub>=0.20.0
8
  hf_transfer>=0.1.4
 
9
  xformers>=0.0.20
10
  triton>=2.0.0
11
  sentencepiece>=0.1.99
 
6
  bitsandbytes>=0.40.0
7
  huggingface_hub>=0.20.0
8
  hf_transfer>=0.1.4
9
+ openai-harmony
10
  xformers>=0.0.20
11
  triton>=2.0.0
12
  sentencepiece>=0.1.99