gpt-oss-20b-demo

Runtime error

File size: 6,142 Bytes

b3d8755
 
a4b21e5
 
 
b3d8755
ae0ab06
 
 
 
 
 
28d4ff3
 
 
ae0ab06
a4b21e5
b3d8755
28d4ff3
 
 
 
b3d8755
28d4ff3
 
 
 
 
 
 
 
 
 
 
 
b3d8755
a4b21e5
 
b3d8755
a4b21e5
 
 
 
 
ae0ab06
a4b21e5
b3d8755
 
ae0ab06
b3d8755
a4b21e5
 
 
 
 
 
b3d8755
a4b21e5
 
 
28d4ff3
ba54a13
a4b21e5
b3d8755
a4b21e5
 
b3d8755
 
28d4ff3
 
 
b3d8755
 
28d4ff3
 
 
 
 
 
 
 
b3d8755
28d4ff3
ae0ab06
b3d8755
 
ae0ab06
 
a4b21e5
ae0ab06
a4b21e5
 
 
 
 
 
6ce8b1e
ae0ab06
 
a4b21e5
b3d8755
 
ae0ab06
a4b21e5
ae0ab06
b3d8755
ba54a13
 
 
b3d8755
6ce8b1e
ba54a13
28d4ff3
 
 
 
ba54a13
6ce8b1e
ba54a13
b3d8755
28d4ff3
ba54a13
b3d8755
 
ba54a13
 
a4b21e5
b3d8755
a4b21e5
 
 
14d377a
a4b21e5
 
 
 
 
 
 
 
 
 
 
 
6ce8b1e
2870fe9
ae0ab06
a4b21e5
 
 
c438893
bdddd23
a4b21e5
 
 
 
 
 
 
 
 
 
 
ae0ab06

import os
import re
from threading import Thread
import gradio as gr
import spaces
from transformers import pipeline, TextIteratorStreamer
from openai_harmony import (
    load_harmony_encoding,
    HarmonyEncodingName,
    Role,
    Message,
    Conversation,
    SystemContent,
    DeveloperContent,
    ReasoningEffort,
)

# --- تنظیمات Regex ---
RE_REASONING = re.compile(r'(?i)Reasoning:\s*(low|medium|high)')
RE_FINAL_MARKER = re.compile(r'(?i)assistantfinal')
RE_ANALYSIS_PREFIX = re.compile(r'(?i)^analysis\s*')

# تابع استخراج سطح استدلال از System Prompt
def parse_reasoning_and_instructions(system_prompt: str):
    instructions = system_prompt or "You are a helpful assistant."
    match = RE_REASONING.search(instructions)
    effort_key = match.group(1).lower() if match else 'medium'
    effort = {
        'low': ReasoningEffort.LOW,
        'medium': ReasoningEffort.MEDIUM,
        'high': ReasoningEffort.HIGH,
    }.get(effort_key, ReasoningEffort.MEDIUM)
    cleaned_instructions = RE_REASONING.sub('', instructions).strip()
    return effort, cleaned_instructions

# شناسه مدل
model_id = "openai/gpt-oss-20b"

# بارگذاری مدل و توکنایزر
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)

# بارگذاری انکودینگ Harmony
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)

def format_conversation_history(chat_history):
    messages = []
    for item in chat_history:
        role = item["role"]
        content = item["content"]
        if isinstance(content, list):
            # اگر محتوا چندرسانه‌ای بود، متن را استخراج کن
            content = content[0]["text"] if content and "text" in content[0] else str(content)
        messages.append({"role": role, "content": content})
    return messages

@spaces.GPU()
def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
    # ساخت پیام جدید کاربر
    new_message = {"role": "user", "content": input_data}
    processed_history = format_conversation_history(chat_history)
    
    # پردازش System Prompt و سطح Reasoning
    effort, instructions = parse_reasoning_and_instructions(system_prompt)
    system_content = SystemContent.new().with_reasoning_effort(effort)
    developer_content = DeveloperContent.new().with_instructions(instructions)
    
    # ساخت پیام‌های فرمت Harmony
    harmony_messages = [
        Message.from_role_and_content(Role.SYSTEM, system_content),
        Message.from_role_and_content(Role.DEVELOPER, developer_content),
    ]
    
    for m in processed_history + [new_message]:
        role = Role.USER if m["role"] == "user" else Role.ASSISTANT
        harmony_messages.append(Message.from_role_and_content(role, m["content"]))
    
    conversation = Conversation.from_messages(harmony_messages)
    prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
    
    # دیکد کردن توکن‌ها به متن برای ارسال به پایپ‌لاین
    prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)

    streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)

    generation_kwargs = {
        "max_new_tokens": max_new_tokens,
        "do_sample": True,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "repetition_penalty": repetition_penalty,
        "streamer": streamer,
        "return_full_text": False,
    }
    
    # اجرای تولید متن در یک ترد جداگانه
    thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
    thread.start()

    # پردازش جریان خروجی (Streaming)
    thinking = ""
    final = ""
    started_final = False
    
    for chunk in streamer:
        if not started_final:
            parts = RE_FINAL_MARKER.split(chunk, maxsplit=1)
            thinking += parts[0]
            if len(parts) > 1:
                final += parts[-1]
                started_final = True
        else:
            final += chunk
            
        clean_thinking = RE_ANALYSIS_PREFIX.sub('', thinking).strip()
        clean_final = final.strip()
        
        # فرمت‌دهی خروجی برای نمایش تفکر (Thinking Process)
        formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
        yield formatted

# رابط کاربری Gradio
demo = gr.ChatInterface(
    fn=generate_response,
    additional_inputs=[
        gr.Slider(label="Max new tokens", minimum=64, maximum=4096, step=1, value=2048),
        gr.Textbox(
            label="System Prompt",
            value="You are a helpful assistant. Reasoning: medium",
            lines=4,
            placeholder="Change system prompt"
        ),
        gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
        gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
        gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
        gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
    ],
    examples=[
        [{"text": "Explain Newton laws clearly and concisely"}],
        [{"text": "What are the benefits of open weight AI models"}],
        [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
    ],
    cache_examples=False,
    type="messages",
    description="""# gpt-oss-20b Demo
Give it a couple of seconds to start. You can adjust reasoning level in the system prompt like "Reasoning: high." Click to view thinking process (default is on).""",
    fill_height=True,
    textbox=gr.Textbox(
        label="Query Input",
        placeholder="Type your prompt"
    ),
    stop_btn="Stop Generation",
    multimodal=False,
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    demo.launch()