Ultronprime commited on
Commit
4b6929f
·
verified ·
1 Parent(s): 017c2f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -42
app.py CHANGED
@@ -1,46 +1,128 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import pipeline
 
4
 
5
- @spaces.GPU()
6
- def generate_text(input_text, history):
7
- # Initialize the pipeline with specified parameters
8
- pipe = pipeline("text-generation", model="ngxson/MiniThinky-v2-1B-Llama-3.2", max_new_tokens=1024, temperature=0.7, do_sample=True)
9
-
10
- # Prepare the system message
11
- system_message = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
12
-
13
- # Format the input messages
14
- messages = [
15
- {"role": "system", "content": system_message},
16
- {"role": "user", "content": input_text}
17
- ]
18
-
19
- # Generate response
20
- response = pipe(messages, max_new_tokens=1024, temperature=0.7, do_sample=True)
21
-
22
- # Extract the generated text
23
- response_text = response[0]["generated_text"]
24
-
25
- # Append user and assistant messages to history
26
- history.append({"role": "user", "content": input_text})
27
- history.append({"role": "assistant", "content": response_text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Return updated history
30
- return history
31
-
32
- # Define the Gradio interface
33
- demo = gr.Interface(
34
- fn=generate_text,
35
- inputs=[
36
- gr.Textbox(label="Enter your text"),
37
- gr.JSON(value=[], visible=False)
38
- ],
39
- outputs=[
40
- gr.Chatbot(type='messages')
41
- ],
42
- title="MiniThinky Text Generator"
43
- )
44
-
45
- # Launch the interface
46
- demo.launch()
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
 
6
+ # Add CSS for footer hiding and styling
7
+ css = """
8
+ footer {
9
+ visibility: hidden;
10
+ }
11
+ .container {max-width: 850px; margin: auto; padding: 20px}
12
+ .title {text-align: center; margin-bottom: 20px}
13
+ """
14
+
15
+ # Model initialization
16
+ model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+
19
+ try:
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ torch_dtype=torch.float16,
24
+ device_map="auto"
25
+ )
26
+ except Exception as e:
27
+ print(f"Error loading model: {e}")
28
+ raise gr.Error("Failed to load model. Please try again later.")
29
+
30
+ SYSTEM_MESSAGE = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
31
+
32
+ def format_chat_prompt(messages):
33
+ formatted_messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
34
+ formatted_messages.extend(messages)
35
+ return tokenizer.apply_chat_template(
36
+ formatted_messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+
41
+ @spaces.GPU(duration=60)
42
+ def generate_response(message, history, progress=gr.Progress(track_tqdm=True)):
43
+ if not message.strip():
44
+ return "", history
45
+
46
+ try:
47
+ # Format messages including history
48
+ messages = []
49
+ for user_msg, assistant_msg in history:
50
+ messages.append({"role": "user", "content": user_msg})
51
+ messages.append({"role": "assistant", "content": assistant_msg})
52
+ messages.append({"role": "user", "content": message})
53
+
54
+ # Format prompt
55
+ prompt = format_chat_prompt(messages)
56
+
57
+ # Tokenize
58
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)
59
+
60
+ # Generate
61
+ outputs = model.generate(
62
+ **inputs,
63
+ max_new_tokens=512,
64
+ temperature=0.7,
65
+ do_sample=True,
66
+ pad_token_id=tokenizer.eos_token_id,
67
+ )
68
+
69
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
+
71
+ # Extract response after the last user message
72
+ response = response.split(message)[-1].strip()
73
+
74
+ # Clear GPU memory
75
+ torch.cuda.empty_cache()
76
+
77
+ return response
78
+
79
+ except Exception as e:
80
+ print(f"Error during generation: {e}")
81
+ return "[Error: Generation failed. Please try again.]", history
82
+
83
+ def respond(message, chat_history):
84
+ try:
85
+ bot_message = generate_response(message, chat_history)
86
+ chat_history.append((message, bot_message))
87
+ return "", chat_history
88
+ except Exception as e:
89
+ raise gr.Error(str(e))
90
+
91
+ # Gradio Interface
92
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
93
+ gr.HTML(
94
+ """
95
+ <div class="title">
96
+ <h1>MiniThinky Chat Assistant</h1>
97
+ <p>A helpful AI assistant that thinks before answering</p>
98
+ </div>
99
+ """
100
+ )
101
 
102
+ with gr.Column(elem_id="col-container"):
103
+ chatbot = gr.Chatbot(height=400)
104
+ with gr.Row():
105
+ msg = gr.Textbox(
106
+ placeholder="Type your message here...",
107
+ container=False,
108
+ scale=4
109
+ )
110
+ submit = gr.Button("Submit", scale=1)
111
+
112
+ clear = gr.ClearButton([msg, chatbot], value="🗑️ Clear Chat")
113
+
114
+ with gr.Accordion("Examples", open=False):
115
+ gr.Examples(
116
+ examples=[
117
+ "What is the capital of France?",
118
+ "Explain quantum computing in simple terms",
119
+ "Write a short poem about AI",
120
+ ],
121
+ inputs=msg
122
+ )
123
+
124
+ msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=True)
125
+ submit.click(respond, [msg, chatbot], [msg, chatbot], queue=True)
126
+
127
+ if __name__ == "__main__":
128
+ demo.queue(max_size=20, api_open=False).launch()