gpt-oss-20b-demo

Runtime error

App Files Files Community

Opera8 commited on 6 days ago

Commit

b3d8755

verified ·

1 Parent(s): e4629bd

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -6

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from transformers import pipeline, TextIteratorStreamer
 from threading import Thread
 import gradio as gr
 import spaces
-import re
 from openai_harmony import (
     load_harmony_encoding,
     HarmonyEncodingName,
@@ -14,12 +15,12 @@ from openai_harmony import (
     ReasoningEffort,
 )
-# regex config
 RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
 RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
 RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
-# I think for system prompt reasoning level OpenAI mentioned you should do parsing so here's
 def parse_reasoning_and_instructions(system_prompt: str):
     instructions = system_prompt or "You are a helpful assistant."
     match = RE_REASONING.search(instructions)
@@ -32,8 +33,10 @@ def parse_reasoning_and_instructions(system_prompt: str):
     cleaned_instructions = RE_REASONING.sub('', instructions).strip()
     return effort, cleaned_instructions
 model_id = "openai/gpt-oss-20b"
 pipe = pipeline(
     "text-generation",
     model=model_id,
@@ -41,24 +44,33 @@ pipe = pipeline(
     device_map="auto",
     trust_remote_code=True,
 )
 enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 def format_conversation_history(chat_history):
     messages = []
     for item in chat_history:
         role = item["role"]
         content = item["content"]
         if isinstance(content, list):
             content = content[0]["text"] if content and "text" in content[0] else str(content)
         messages.append({"role": role, "content": content})
     return messages
 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
     new_message = {"role": "user", "content": input_data}
     processed_history = format_conversation_history(chat_history)
     effort, instructions = parse_reasoning_and_instructions(system_prompt)
     system_content = SystemContent.new().with_reasoning_effort(effort)
     developer_content = DeveloperContent.new().with_instructions(instructions)
     harmony_messages = [
         Message.from_role_and_content(Role.SYSTEM, system_content),
         Message.from_role_and_content(Role.DEVELOPER, developer_content),
@@ -67,8 +79,11 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
     for m in processed_history + [new_message]:
         role = Role.USER if m["role"] == "user" else Role.ASSISTANT
         harmony_messages.append(Message.from_role_and_content(role, m["content"]))
     conversation = Conversation.from_messages(harmony_messages)
     prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
     prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
     streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -83,13 +98,16 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
         "streamer": streamer,
         "return_full_text": False,
     }
     thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
     thread.start()
-    # parsing thinking
     thinking = ""
     final = ""
     started_final = False
     for chunk in streamer:
         if not started_final:
             parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
@@ -99,11 +117,15 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
                 started_final = True
         else:
             final += chunk
         clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
         clean_final = final.strip()
         formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
         yield formatted
 demo = gr.ChatInterface(
     fn=generate_response,
     additional_inputs=[
@@ -123,7 +145,6 @@ demo = gr.ChatInterface(
         [{"text": "Explain Newton laws clearly and concisely"}],
         [{"text": "What are the benefits of open weight AI models"}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
     ],
     cache_examples=False,
     type="messages",

+import os
+import re
 from threading import Thread
 import gradio as gr
 import spaces
+from transformers import pipeline, TextIteratorStreamer
 from openai_harmony import (
     load_harmony_encoding,
     HarmonyEncodingName,
     ReasoningEffort,
 )
+# --- تنظیمات Regex ---
 RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
 RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
 RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')
+# تابع استخراج سطح استدلال از System Prompt
 def parse_reasoning_and_instructions(system_prompt: str):
     instructions = system_prompt or "You are a helpful assistant."
     match = RE_REASONING.search(instructions)
     cleaned_instructions = RE_REASONING.sub('', instructions).strip()
     return effort, cleaned_instructions
+# شناسه مدل
 model_id = "openai/gpt-oss-20b"
+# بارگذاری مدل و توکنایزر
 pipe = pipeline(
     "text-generation",
     model=model_id,
     device_map="auto",
     trust_remote_code=True,
 )
+# بارگذاری انکودینگ Harmony
 enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 def format_conversation_history(chat_history):
     messages = []
     for item in chat_history:
         role = item["role"]
         content = item["content"]
         if isinstance(content, list):
+            # اگر محتوا چندرسانه‌ای بود، متن را استخراج کن
             content = content[0]["text"] if content and "text" in content[0] else str(content)
         messages.append({"role": role, "content": content})
     return messages
 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
+    # ساخت پیام جدید کاربر
     new_message = {"role": "user", "content": input_data}
     processed_history = format_conversation_history(chat_history)
+    # پردازش System Prompt و سطح Reasoning
     effort, instructions = parse_reasoning_and_instructions(system_prompt)
     system_content = SystemContent.new().with_reasoning_effort(effort)
     developer_content = DeveloperContent.new().with_instructions(instructions)
+    # ساخت پیام‌های فرمت Harmony
     harmony_messages = [
         Message.from_role_and_content(Role.SYSTEM, system_content),
         Message.from_role_and_content(Role.DEVELOPER, developer_content),
     for m in processed_history + [new_message]:
         role = Role.USER if m["role"] == "user" else Role.ASSISTANT
         harmony_messages.append(Message.from_role_and_content(role, m["content"]))
     conversation = Conversation.from_messages(harmony_messages)
     prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
+    # دیکد کردن توکن‌ها به متن برای ارسال به پایپ‌لاین
     prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
     streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
         "streamer": streamer,
         "return_full_text": False,
     }
+    # اجرای تولید متن در یک ترد جداگانه
     thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
     thread.start()
+    # پردازش جریان خروجی (Streaming)
     thinking = ""
     final = ""
     started_final = False
     for chunk in streamer:
         if not started_final:
             parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
                 started_final = True
         else:
             final += chunk
         clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
         clean_final = final.strip()
+        # فرمت‌دهی خروجی برای نمایش تفکر (Thinking Process)
         formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
         yield formatted
+# رابط کاربری Gradio
 demo = gr.ChatInterface(
     fn=generate_response,
     additional_inputs=[
         [{"text": "Explain Newton laws clearly and concisely"}],
         [{"text": "What are the benefits of open weight AI models"}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
     ],
     cache_examples=False,
     type="messages",