Spaces:

Amossofer
/

test2

Runtime error

App Files Files Community

Amossofer commited on Aug 3, 2025

Commit

142eb42

1 Parent(s): 2ef1e0a

tt

Browse files

Files changed (1) hide show

app.py +50 -30

app.py CHANGED Viewed

@@ -1,46 +1,66 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import gradio as gr
 import torch
-# Load tiny model from Hugging Face
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 )
-# Use text-generation pipeline (without `device=0`)
-generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Function to blend two prompts with weights (wa and wb)
-def blend_and_generate(prompt_a, prompt_b, wa, wb):
-    # Normalize weights even if negative
-    total = abs(wa) + abs(wb)
-    if total == 0:
-        return "Error: Both weights cannot be zero."
-    norm_wa = wa / total
-    norm_wb = wb / total
-    # Create blended prompt
-    blended_prompt = f"{norm_wa:.2f} * ({prompt_a}) + {norm_wb:.2f} * ({prompt_b})"
-    generated = generator(blended_prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
-    return generated[0]["generated_text"]
-# Gradio UI
-demo = gr.Interface(
-    fn=blend_and_generate,
     inputs=[
-        gr.Textbox(label="Prompt A"),
-        gr.Textbox(label="Prompt B"),
-        gr.Slider(minimum=-5, maximum=5, step=0.1, label="Weight A (wa)"),
-        gr.Slider(minimum=-5, maximum=5, step=0.1, label="Weight B (wb)"),
     ],
-    outputs=gr.Textbox(label="Generated Output"),
-    title="Tiny Prompt Blender (TinyLlama-1.1B)",
-    description="Enter two prompts and blend them using wa and wb (can be negative).",
 )
-# Launch app
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForCausalLM
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 )
+model.eval()
+def generate_stream(sysA, sysB, wA, wB, user_message, max_new_tokens=100, temperature=1.0, top_p=0.9):
+    promptA = f"<|system|>{sysA}\n<|user|>{user_message}\n<|assistant|>"
+    promptB = f"<|system|>{sysB}\n<|user|>{user_message}\n<|assistant|>"
+    idsA = tokenizer(promptA, return_tensors="pt").input_ids.to(model.device)
+    idsB = tokenizer(promptB, return_tensors="pt").input_ids.to(model.device)
+    outA, outB = idsA.clone(), idsB.clone()
+    response = ""
+    yield response  # start stream
+    for _ in range(max_new_tokens):
+        with torch.no_grad():
+            logitsA = model(input_ids=outA).logits[:, -1, :]
+            logitsB = model(input_ids=outB).logits[:, -1, :]
+        blended = wA * logitsA + wB * logitsB
+        blended = blended / (temperature if temperature > 0 else 1.0)
+        probs = F.softmax(blended, dim=-1)
+        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
+        cum = torch.cumsum(sorted_probs, dim=-1)
+        sorted_probs[cum > top_p] = 0
+        sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True)
+        token = sorted_idx[:, torch.multinomial(sorted_probs, 1)].squeeze()
+        outA = torch.cat([outA, token.unsqueeze(0).unsqueeze(0)], dim=1)
+        outB = torch.cat([outB, token.unsqueeze(0).unsqueeze(0)], dim=1)
+        token_str = tokenizer.decode(token)
+        response += token_str
+        yield response
+        if token.item() == tokenizer.eos_token_id:
+            break
+demo = gr.ChatInterface(
+    fn=generate_stream,
     inputs=[
+        gr.Textbox(label="System Prompt A", value="You are assistant A."),
+        gr.Textbox(label="System Prompt B", value="You are assistant B."),
+        gr.Slider(label="Weight wA", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
+        gr.Slider(label="Weight wB", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
+        gr.Textbox(label="User Message"),
+        gr.Slider(label="Max New Tokens", minimum=1, maximum=200, step=1, value=100),
+        gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
+        gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, step=0.05, value=0.9),
     ],
+    title="Streaming Blended TinyLlama Chat"
 )
 if __name__ == "__main__":
     demo.launch()