Sleep

Sleeping

File size: 2,040 Bytes

be16a0b
 
 
 
 
 
 
1c325f0
be16a0b
 
 
 
 
 
 
 
 
 
2f54331
 
 
 
be16a0b
2f54331
ef2bd77
2f54331
ef2bd77
be16a0b
ef2bd77
be16a0b
 
 
ef2bd77
be16a0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f63c4
be16a0b
 
f5f63c4
be16a0b
 
 
 
 
 
2f54331
be16a0b
 
 
 
 
2f54331

import spaces
import gradio as gr
from transformers import pipeline, TextIteratorStreamer
import torch
import threading

# Load model and tokenizer
model_name = "krish10/Qwen3_14B_16bit_Sleep"
pipe = pipeline("text-generation", model=model_name, device=0)
tokenizer = pipe.tokenizer
model = pipe.model

# Fixed generation config
MAX_TOKENS = 3000
TEMPERATURE = 0.1
TOP_P = 0.9

@spaces.GPU
def respond_stream(user_input):
    # Validate input
    if not user_input.strip():
        return "❌ Error: Input text is required."

    # Use the entire input directly in the prompt
    prompt = (
        f"Instruction: \n\n{user_input.strip()}"
    )

    # Wrap into message for chat template
    messages = [{"role": "user", "content": prompt}]
    prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Tokenize and prepare streamer
    inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    generation_kwargs = dict(
        input_ids=inputs["input_ids"],
        streamer=streamer,
        max_new_tokens=MAX_TOKENS,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
    )

    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    partial_text = ""
    for token in streamer:
        partial_text += token
        yield partial_text

# Build Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Sleep trained Qwen3-14b")

    with gr.Column():
        user_input = gr.Textbox(label="Input Text", lines=15, placeholder="Paste your full input here")

    output_box = gr.Textbox(label="Model Response", lines=15, interactive=False)
    generate_btn = gr.Button("Generate")

    generate_btn.click(
        fn=respond_stream,
        inputs=[user_input],
        outputs=[output_box]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()