Opera8 commited on
Commit
b3d8755
·
verified ·
1 Parent(s): e4629bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -6
app.py CHANGED
@@ -1,8 +1,9 @@
1
- from transformers import pipeline, TextIteratorStreamer
 
2
  from threading import Thread
3
  import gradio as gr
4
  import spaces
5
- import re
6
  from openai_harmony import (
7
  load_harmony_encoding,
8
  HarmonyEncodingName,
@@ -14,12 +15,12 @@ from openai_harmony import (
14
  ReasoningEffort,
15
  )
16
 
17
- # regex config
18
  RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
19
  RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
20
  RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
21
 
22
- # I think for system prompt reasoning level OpenAI mentioned you should do parsing so here's
23
  def parse_reasoning_and_instructions(system_prompt: str):
24
  instructions = system_prompt or "You are a helpful assistant."
25
  match = RE_REASONING.search(instructions)
@@ -32,8 +33,10 @@ def parse_reasoning_and_instructions(system_prompt: str):
32
  cleaned_instructions = RE_REASONING.sub('', instructions).strip()
33
  return effort, cleaned_instructions
34
 
 
35
  model_id = "openai/gpt-oss-20b"
36
 
 
37
  pipe = pipeline(
38
  "text-generation",
39
  model=model_id,
@@ -41,24 +44,33 @@ pipe = pipeline(
41
  device_map="auto",
42
  trust_remote_code=True,
43
  )
 
 
44
  enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
45
  def format_conversation_history(chat_history):
46
  messages = []
47
  for item in chat_history:
48
  role = item["role"]
49
  content = item["content"]
50
  if isinstance(content, list):
 
51
  content = content[0]["text"] if content and "text" in content[0] else str(content)
52
  messages.append({"role": role, "content": content})
53
  return messages
54
 
55
  @spaces.GPU()
56
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
 
57
  new_message = {"role": "user", "content": input_data}
58
  processed_history = format_conversation_history(chat_history)
 
 
59
  effort, instructions = parse_reasoning_and_instructions(system_prompt)
60
  system_content = SystemContent.new().with_reasoning_effort(effort)
61
  developer_content = DeveloperContent.new().with_instructions(instructions)
 
 
62
  harmony_messages = [
63
  Message.from_role_and_content(Role.SYSTEM, system_content),
64
  Message.from_role_and_content(Role.DEVELOPER, developer_content),
@@ -67,8 +79,11 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
67
  for m in processed_history + [new_message]:
68
  role = Role.USER if m["role"] == "user" else Role.ASSISTANT
69
  harmony_messages.append(Message.from_role_and_content(role, m["content"]))
 
70
  conversation = Conversation.from_messages(harmony_messages)
71
  prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
 
 
72
  prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
73
 
74
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -83,13 +98,16 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
83
  "streamer": streamer,
84
  "return_full_text": False,
85
  }
 
 
86
  thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
87
  thread.start()
88
 
89
- # parsing thinking
90
  thinking = ""
91
  final = ""
92
  started_final = False
 
93
  for chunk in streamer:
94
  if not started_final:
95
  parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
@@ -99,11 +117,15 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
99
  started_final = True
100
  else:
101
  final += chunk
 
102
  clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
103
  clean_final = final.strip()
 
 
104
  formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
105
  yield formatted
106
 
 
107
  demo = gr.ChatInterface(
108
  fn=generate_response,
109
  additional_inputs=[
@@ -123,7 +145,6 @@ demo = gr.ChatInterface(
123
  [{"text": "Explain Newton laws clearly and concisely"}],
124
  [{"text": "What are the benefits of open weight AI models"}],
125
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
126
-
127
  ],
128
  cache_examples=False,
129
  type="messages",
 
1
+ import os
2
+ import re
3
  from threading import Thread
4
  import gradio as gr
5
  import spaces
6
+ from transformers import pipeline, TextIteratorStreamer
7
  from openai_harmony import (
8
  load_harmony_encoding,
9
  HarmonyEncodingName,
 
15
  ReasoningEffort,
16
  )
17
 
18
+ # --- تنظیمات Regex ---
19
  RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
20
  RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
21
  RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
22
 
23
+ # تابع استخراج سطح استدلال از System Prompt
24
  def parse_reasoning_and_instructions(system_prompt: str):
25
  instructions = system_prompt or "You are a helpful assistant."
26
  match = RE_REASONING.search(instructions)
 
33
  cleaned_instructions = RE_REASONING.sub('', instructions).strip()
34
  return effort, cleaned_instructions
35
 
36
+ # شناسه مدل
37
  model_id = "openai/gpt-oss-20b"
38
 
39
+ # بارگذاری مدل و توکنایزر
40
  pipe = pipeline(
41
  "text-generation",
42
  model=model_id,
 
44
  device_map="auto",
45
  trust_remote_code=True,
46
  )
47
+
48
+ # بارگذاری انکودینگ Harmony
49
  enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
50
+
51
  def format_conversation_history(chat_history):
52
  messages = []
53
  for item in chat_history:
54
  role = item["role"]
55
  content = item["content"]
56
  if isinstance(content, list):
57
+ # اگر محتوا چندرسانه‌ای بود، متن را استخراج کن
58
  content = content[0]["text"] if content and "text" in content[0] else str(content)
59
  messages.append({"role": role, "content": content})
60
  return messages
61
 
62
  @spaces.GPU()
63
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
64
+ # ساخت پیام جدید کاربر
65
  new_message = {"role": "user", "content": input_data}
66
  processed_history = format_conversation_history(chat_history)
67
+
68
+ # پردازش System Prompt و سطح Reasoning
69
  effort, instructions = parse_reasoning_and_instructions(system_prompt)
70
  system_content = SystemContent.new().with_reasoning_effort(effort)
71
  developer_content = DeveloperContent.new().with_instructions(instructions)
72
+
73
+ # ساخت پیام‌های فرمت Harmony
74
  harmony_messages = [
75
  Message.from_role_and_content(Role.SYSTEM, system_content),
76
  Message.from_role_and_content(Role.DEVELOPER, developer_content),
 
79
  for m in processed_history + [new_message]:
80
  role = Role.USER if m["role"] == "user" else Role.ASSISTANT
81
  harmony_messages.append(Message.from_role_and_content(role, m["content"]))
82
+
83
  conversation = Conversation.from_messages(harmony_messages)
84
  prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
85
+
86
+ # دیکد کردن توکن‌ها به متن برای ارسال به پایپ‌لاین
87
  prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
88
 
89
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
 
98
  "streamer": streamer,
99
  "return_full_text": False,
100
  }
101
+
102
+ # اجرای تولید متن در یک ترد جداگانه
103
  thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
104
  thread.start()
105
 
106
+ # پردازش جریان خروجی (Streaming)
107
  thinking = ""
108
  final = ""
109
  started_final = False
110
+
111
  for chunk in streamer:
112
  if not started_final:
113
  parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
 
117
  started_final = True
118
  else:
119
  final += chunk
120
+
121
  clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
122
  clean_final = final.strip()
123
+
124
+ # فرمت‌دهی خروجی برای نمایش تفکر (Thinking Process)
125
  formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
126
  yield formatted
127
 
128
+ # رابط کاربری Gradio
129
  demo = gr.ChatInterface(
130
  fn=generate_response,
131
  additional_inputs=[
 
145
  [{"text": "Explain Newton laws clearly and concisely"}],
146
  [{"text": "What are the benefits of open weight AI models"}],
147
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
 
148
  ],
149
  cache_examples=False,
150
  type="messages",