Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
from threading import Thread
|
| 3 |
import gradio as gr
|
| 4 |
import spaces
|
| 5 |
-
import
|
| 6 |
from openai_harmony import (
|
| 7 |
load_harmony_encoding,
|
| 8 |
HarmonyEncodingName,
|
|
@@ -14,12 +15,12 @@ from openai_harmony import (
|
|
| 14 |
ReasoningEffort,
|
| 15 |
)
|
| 16 |
|
| 17 |
-
#
|
| 18 |
RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
|
| 19 |
RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
|
| 20 |
RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
|
| 21 |
|
| 22 |
-
#
|
| 23 |
def parse_reasoning_and_instructions(system_prompt: str):
|
| 24 |
instructions = system_prompt or "You are a helpful assistant."
|
| 25 |
match = RE_REASONING.search(instructions)
|
|
@@ -32,8 +33,10 @@ def parse_reasoning_and_instructions(system_prompt: str):
|
|
| 32 |
cleaned_instructions = RE_REASONING.sub('', instructions).strip()
|
| 33 |
return effort, cleaned_instructions
|
| 34 |
|
|
|
|
| 35 |
model_id = "openai/gpt-oss-20b"
|
| 36 |
|
|
|
|
| 37 |
pipe = pipeline(
|
| 38 |
"text-generation",
|
| 39 |
model=model_id,
|
|
@@ -41,24 +44,33 @@ pipe = pipeline(
|
|
| 41 |
device_map="auto",
|
| 42 |
trust_remote_code=True,
|
| 43 |
)
|
|
|
|
|
|
|
| 44 |
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
|
|
|
| 45 |
def format_conversation_history(chat_history):
|
| 46 |
messages = []
|
| 47 |
for item in chat_history:
|
| 48 |
role = item["role"]
|
| 49 |
content = item["content"]
|
| 50 |
if isinstance(content, list):
|
|
|
|
| 51 |
content = content[0]["text"] if content and "text" in content[0] else str(content)
|
| 52 |
messages.append({"role": role, "content": content})
|
| 53 |
return messages
|
| 54 |
|
| 55 |
@spaces.GPU()
|
| 56 |
def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
|
|
|
|
| 57 |
new_message = {"role": "user", "content": input_data}
|
| 58 |
processed_history = format_conversation_history(chat_history)
|
|
|
|
|
|
|
| 59 |
effort, instructions = parse_reasoning_and_instructions(system_prompt)
|
| 60 |
system_content = SystemContent.new().with_reasoning_effort(effort)
|
| 61 |
developer_content = DeveloperContent.new().with_instructions(instructions)
|
|
|
|
|
|
|
| 62 |
harmony_messages = [
|
| 63 |
Message.from_role_and_content(Role.SYSTEM, system_content),
|
| 64 |
Message.from_role_and_content(Role.DEVELOPER, developer_content),
|
|
@@ -67,8 +79,11 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
|
|
| 67 |
for m in processed_history + [new_message]:
|
| 68 |
role = Role.USER if m["role"] == "user" else Role.ASSISTANT
|
| 69 |
harmony_messages.append(Message.from_role_and_content(role, m["content"]))
|
|
|
|
| 70 |
conversation = Conversation.from_messages(harmony_messages)
|
| 71 |
prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
|
|
|
|
|
|
|
| 72 |
prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
|
| 73 |
|
| 74 |
streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
@@ -83,13 +98,16 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
|
|
| 83 |
"streamer": streamer,
|
| 84 |
"return_full_text": False,
|
| 85 |
}
|
|
|
|
|
|
|
| 86 |
thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
|
| 87 |
thread.start()
|
| 88 |
|
| 89 |
-
#
|
| 90 |
thinking = ""
|
| 91 |
final = ""
|
| 92 |
started_final = False
|
|
|
|
| 93 |
for chunk in streamer:
|
| 94 |
if not started_final:
|
| 95 |
parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
|
|
@@ -99,11 +117,15 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
|
|
| 99 |
started_final = True
|
| 100 |
else:
|
| 101 |
final += chunk
|
|
|
|
| 102 |
clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
|
| 103 |
clean_final = final.strip()
|
|
|
|
|
|
|
| 104 |
formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
|
| 105 |
yield formatted
|
| 106 |
|
|
|
|
| 107 |
demo = gr.ChatInterface(
|
| 108 |
fn=generate_response,
|
| 109 |
additional_inputs=[
|
|
@@ -123,7 +145,6 @@ demo = gr.ChatInterface(
|
|
| 123 |
[{"text": "Explain Newton laws clearly and concisely"}],
|
| 124 |
[{"text": "What are the benefits of open weight AI models"}],
|
| 125 |
[{"text": "Write a Python function to calculate the Fibonacci sequence"}],
|
| 126 |
-
|
| 127 |
],
|
| 128 |
cache_examples=False,
|
| 129 |
type="messages",
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
from threading import Thread
|
| 4 |
import gradio as gr
|
| 5 |
import spaces
|
| 6 |
+
from transformers import pipeline, TextIteratorStreamer
|
| 7 |
from openai_harmony import (
|
| 8 |
load_harmony_encoding,
|
| 9 |
HarmonyEncodingName,
|
|
|
|
| 15 |
ReasoningEffort,
|
| 16 |
)
|
| 17 |
|
| 18 |
+
# --- تنظیمات Regex ---
|
| 19 |
RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
|
| 20 |
RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
|
| 21 |
RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
|
| 22 |
|
| 23 |
+
# تابع استخراج سطح استدلال از System Prompt
|
| 24 |
def parse_reasoning_and_instructions(system_prompt: str):
|
| 25 |
instructions = system_prompt or "You are a helpful assistant."
|
| 26 |
match = RE_REASONING.search(instructions)
|
|
|
|
| 33 |
cleaned_instructions = RE_REASONING.sub('', instructions).strip()
|
| 34 |
return effort, cleaned_instructions
|
| 35 |
|
| 36 |
+
# شناسه مدل
|
| 37 |
model_id = "openai/gpt-oss-20b"
|
| 38 |
|
| 39 |
+
# بارگذاری مدل و توکنایزر
|
| 40 |
pipe = pipeline(
|
| 41 |
"text-generation",
|
| 42 |
model=model_id,
|
|
|
|
| 44 |
device_map="auto",
|
| 45 |
trust_remote_code=True,
|
| 46 |
)
|
| 47 |
+
|
| 48 |
+
# بارگذاری انکودینگ Harmony
|
| 49 |
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
| 50 |
+
|
| 51 |
def format_conversation_history(chat_history):
|
| 52 |
messages = []
|
| 53 |
for item in chat_history:
|
| 54 |
role = item["role"]
|
| 55 |
content = item["content"]
|
| 56 |
if isinstance(content, list):
|
| 57 |
+
# اگر محتوا چندرسانهای بود، متن را استخراج کن
|
| 58 |
content = content[0]["text"] if content and "text" in content[0] else str(content)
|
| 59 |
messages.append({"role": role, "content": content})
|
| 60 |
return messages
|
| 61 |
|
| 62 |
@spaces.GPU()
|
| 63 |
def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
|
| 64 |
+
# ساخت پیام جدید کاربر
|
| 65 |
new_message = {"role": "user", "content": input_data}
|
| 66 |
processed_history = format_conversation_history(chat_history)
|
| 67 |
+
|
| 68 |
+
# پردازش System Prompt و سطح Reasoning
|
| 69 |
effort, instructions = parse_reasoning_and_instructions(system_prompt)
|
| 70 |
system_content = SystemContent.new().with_reasoning_effort(effort)
|
| 71 |
developer_content = DeveloperContent.new().with_instructions(instructions)
|
| 72 |
+
|
| 73 |
+
# ساخت پیامهای فرمت Harmony
|
| 74 |
harmony_messages = [
|
| 75 |
Message.from_role_and_content(Role.SYSTEM, system_content),
|
| 76 |
Message.from_role_and_content(Role.DEVELOPER, developer_content),
|
|
|
|
| 79 |
for m in processed_history + [new_message]:
|
| 80 |
role = Role.USER if m["role"] == "user" else Role.ASSISTANT
|
| 81 |
harmony_messages.append(Message.from_role_and_content(role, m["content"]))
|
| 82 |
+
|
| 83 |
conversation = Conversation.from_messages(harmony_messages)
|
| 84 |
prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
|
| 85 |
+
|
| 86 |
+
# دیکد کردن توکنها به متن برای ارسال به پایپلاین
|
| 87 |
prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
|
| 88 |
|
| 89 |
streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
|
|
| 98 |
"streamer": streamer,
|
| 99 |
"return_full_text": False,
|
| 100 |
}
|
| 101 |
+
|
| 102 |
+
# اجرای تولید متن در یک ترد جداگانه
|
| 103 |
thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
|
| 104 |
thread.start()
|
| 105 |
|
| 106 |
+
# پردازش جریان خروجی (Streaming)
|
| 107 |
thinking = ""
|
| 108 |
final = ""
|
| 109 |
started_final = False
|
| 110 |
+
|
| 111 |
for chunk in streamer:
|
| 112 |
if not started_final:
|
| 113 |
parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
|
|
|
|
| 117 |
started_final = True
|
| 118 |
else:
|
| 119 |
final += chunk
|
| 120 |
+
|
| 121 |
clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
|
| 122 |
clean_final = final.strip()
|
| 123 |
+
|
| 124 |
+
# فرمتدهی خروجی برای نمایش تفکر (Thinking Process)
|
| 125 |
formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
|
| 126 |
yield formatted
|
| 127 |
|
| 128 |
+
# رابط کاربری Gradio
|
| 129 |
demo = gr.ChatInterface(
|
| 130 |
fn=generate_response,
|
| 131 |
additional_inputs=[
|
|
|
|
| 145 |
[{"text": "Explain Newton laws clearly and concisely"}],
|
| 146 |
[{"text": "What are the benefits of open weight AI models"}],
|
| 147 |
[{"text": "Write a Python function to calculate the Fibonacci sequence"}],
|
|
|
|
| 148 |
],
|
| 149 |
cache_examples=False,
|
| 150 |
type="messages",
|