Amossofer commited on
Commit
2ef1e0a
·
1 Parent(s): 7f2e44d
Files changed (1) hide show
  1. app.py +43 -47
app.py CHANGED
@@ -1,50 +1,46 @@
 
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
-
4
- # Load the TinyLlama model and tokenizer
5
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
8
-
9
- # Initialize the text generation pipeline
10
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
11
-
12
-
13
- def generate(sysA, sysB, wa, wb, user_input):
14
- # Construct the system prompts with weights
15
- prompt_a = f"System A: {sysA}\n" * int(wa)
16
- prompt_b = f"System B: {sysB}\n" * int(wb)
17
-
18
- # Combine prompts and user input
19
- full_prompt = prompt_a + prompt_b + f"User: {user_input}\nAssistant:"
20
-
21
- # Generate the response using the model
22
- response = generator(full_prompt, max_length=512, num_return_sequences=1)[0]['generated_text']
23
-
24
- return response
25
-
26
-
27
- with gr.Blocks() as demo:
28
- gr.Markdown("# Multi-System Prompt Chat Demo")
29
-
30
- with gr.Row():
31
- sysA = gr.Textbox(label="System Prompt A", value="You are assistant A.", lines=2)
32
- sysB = gr.Textbox(label="System Prompt B", value="You are assistant B.", lines=2)
33
-
34
- with gr.Row():
35
- wa = gr.Slider(-5.0, 5.0, value=1.0, step=0.1, label="Weight wA")
36
- wb = gr.Slider(-5.0, 5.0, value=1.0, step=0.1, label="Weight wB")
37
-
38
- user_input = gr.Textbox(label="User Message", lines=2)
39
- output = gr.Textbox(label="Model Response", lines=10)
40
-
41
- submit_btn = gr.Button("Send")
42
-
43
- submit_btn.click(
44
- fn=generate,
45
- inputs=[sysA, sysB, wa, wb, user_input],
46
- outputs=output
47
- )
48
-
49
  if __name__ == "__main__":
50
  demo.launch()
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
  import gradio as gr
3
+ import torch
4
+
5
+ # Load tiny model from Hugging Face
6
+ model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
11
+ )
12
+
13
+ # Use text-generation pipeline (without `device=0`)
14
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
15
+
16
+ # Function to blend two prompts with weights (wa and wb)
17
+ def blend_and_generate(prompt_a, prompt_b, wa, wb):
18
+ # Normalize weights even if negative
19
+ total = abs(wa) + abs(wb)
20
+ if total == 0:
21
+ return "Error: Both weights cannot be zero."
22
+ norm_wa = wa / total
23
+ norm_wb = wb / total
24
+
25
+ # Create blended prompt
26
+ blended_prompt = f"{norm_wa:.2f} * ({prompt_a}) + {norm_wb:.2f} * ({prompt_b})"
27
+ generated = generator(blended_prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
28
+ return generated[0]["generated_text"]
29
+
30
+ # Gradio UI
31
+ demo = gr.Interface(
32
+ fn=blend_and_generate,
33
+ inputs=[
34
+ gr.Textbox(label="Prompt A"),
35
+ gr.Textbox(label="Prompt B"),
36
+ gr.Slider(minimum=-5, maximum=5, step=0.1, label="Weight A (wa)"),
37
+ gr.Slider(minimum=-5, maximum=5, step=0.1, label="Weight B (wb)"),
38
+ ],
39
+ outputs=gr.Textbox(label="Generated Output"),
40
+ title="Tiny Prompt Blender (TinyLlama-1.1B)",
41
+ description="Enter two prompts and blend them using wa and wb (can be negative).",
42
+ )
43
+
44
+ # Launch app
 
 
 
 
 
45
  if __name__ == "__main__":
46
  demo.launch()