Spaces:

Amossofer
/

test2

Runtime error

App Files Files Community

Amossofer commited on Aug 3

Commit

592da02

1 Parent(s): 3483699

tt

Browse files

Files changed (1) hide show

app.py +34 -62

app.py CHANGED Viewed

@@ -1,67 +1,39 @@
 import gradio as gr
-import torch
-import torch.nn.functional as F
-from transformers import AutoTokenizer, AutoModelForCausalLM
-MODEL_ID = "tiiuae/falcon-rw-1b"  # small model for local use
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to("cpu")  # or "cuda" if available
-def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperature=1.0, top_p=0.95):
-    promptA = f"<|system|>{sysA}\n<|user|>{user_input}<|assistant|>"
-    promptB = f"<|system|>{sysB}\n<|user|>{user_input}<|assistant|>"
-    idsA = tokenizer(promptA, return_tensors="pt").input_ids.to(model.device)
-    idsB = tokenizer(promptB, return_tensors="pt").input_ids.to(model.device)
-    outA = idsA.clone()
-    outB = idsB.clone()
-    response = ""
-    yield response  # send initial blank to start stream
-    for _ in range(max_new_tokens):
-        with torch.no_grad():
-            logitsA = model(input_ids=outA).logits[:, -1, :]
-            logitsB = model(input_ids=outB).logits[:, -1, :]
-        # Weighted average of logits
-        logits = wa * logitsA + wb * logitsB
-        logits = logits / (temperature if temperature > 0 else 1.0)
-        probs = F.softmax(logits, dim=-1)
-        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
-        cumulative = torch.cumsum(sorted_probs, dim=-1)
-        sorted_probs[cumulative > top_p] = 0
-        sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True)
-        token = sorted_idx[:, torch.multinomial(sorted_probs, 1)].squeeze()
-        outA = torch.cat([outA, token.unsqueeze(0).unsqueeze(0)], dim=1)
-        outB = torch.cat([outB, token.unsqueeze(0).unsqueeze(0)], dim=1)
-        token_str = tokenizer.decode(token)
-        response += token_str
-        yield response
-        if token.item() == tokenizer.eos_token_id:
-            break
-# ✅ Define the demo interface correctly
-demo = gr.ChatInterface(
-    fn=generate_stream,
-    inputs=[
-        gr.Textbox(label="System Prompt A", value="You are assistant A."),
-        gr.Textbox(label="System Prompt B", value="You are assistant B."),
-        gr.Slider(label="Weight wA", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
-        gr.Slider(label="Weight wB", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
-        gr.Textbox(label="User Message", placeholder="Enter your message..."),
-        gr.Slider(label="Max new tokens", minimum=1, maximum=200, step=1, value=50),
-        gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
-        gr.Slider(label="Top‑p", minimum=0.1, maximum=1.0, step=0.05, value=0.95),
-    ],
-    title="Two-System Weighted Blending Chat",
-    description="Combines two system prompts using weighted logit blending: response = wA⋅modelA + wB⋅modelB.",
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+def generate(sysA, sysB, wa, wb, user_input):
+    # Example blending logic — replace with your actual model call
+    response = (
+        f"System Prompt A: {sysA}\n"
+        f"System Prompt B: {sysB}\n"
+        f"Weight A: {wa}\n"
+        f"Weight B: {wb}\n"
+        f"User message: {user_input}\n\n"
+        "=== Response ===\n"
+        f"Blended response based on weights."
+    )
+    return response
+with gr.Blocks() as demo:
+    gr.Markdown("# Multi-System Prompt Chat Demo")
+    with gr.Row():
+        sysA = gr.Textbox(label="System Prompt A", value="You are assistant A.", lines=2)
+        sysB = gr.Textbox(label="System Prompt B", value="You are assistant B.", lines=2)
+    with gr.Row():
+        wa = gr.Slider(-5.0, 5.0, value=1.0, step=0.1, label="Weight wA")
+        wb = gr.Slider(-5.0, 5.0, value=1.0, step=0.1, label="Weight wB")
+    user_input = gr.Textbox(label="User Message", placeholder="Type your message here...")
+    output = gr.Textbox(label="Model Response", lines=10)
+    submit_btn = gr.Button("Send")
+    submit_btn.click(
+        fn=generate,
+        inputs=[sysA, sysB, wa, wb, user_input],
+        outputs=output
+    )
 if __name__ == "__main__":
     demo.launch()