File size: 2,040 Bytes
be16a0b 1c325f0 be16a0b 2f54331 be16a0b 2f54331 ef2bd77 2f54331 ef2bd77 be16a0b ef2bd77 be16a0b ef2bd77 be16a0b f5f63c4 be16a0b f5f63c4 be16a0b 2f54331 be16a0b 2f54331 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import spaces
import gradio as gr
from transformers import pipeline, TextIteratorStreamer
import torch
import threading
# Load model and tokenizer
model_name = "krish10/Qwen3_14B_16bit_Sleep"
pipe = pipeline("text-generation", model=model_name, device=0)
tokenizer = pipe.tokenizer
model = pipe.model
# Fixed generation config
MAX_TOKENS = 3000
TEMPERATURE = 0.1
TOP_P = 0.9
@spaces.GPU
def respond_stream(user_input):
# Validate input
if not user_input.strip():
return "❌ Error: Input text is required."
# Use the entire input directly in the prompt
prompt = (
f"Instruction: \n\n{user_input.strip()}"
)
# Wrap into message for chat template
messages = [{"role": "user", "content": prompt}]
prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Tokenize and prepare streamer
inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
input_ids=inputs["input_ids"],
streamer=streamer,
max_new_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
top_p=TOP_P,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for token in streamer:
partial_text += token
yield partial_text
# Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## 🤖 Sleep trained Qwen3-14b")
with gr.Column():
user_input = gr.Textbox(label="Input Text", lines=15, placeholder="Paste your full input here")
output_box = gr.Textbox(label="Model Response", lines=15, interactive=False)
generate_btn = gr.Button("Generate")
generate_btn.click(
fn=respond_stream,
inputs=[user_input],
outputs=[output_box]
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|