Javedalam commited on
Commit
8fca131
·
verified ·
1 Parent(s): 30745cf

Deploy Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +128 -0
  2. requirements.txt +22 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import spaces
5
+
6
+ # Model configuration
7
+ MODEL_ID = "WeiboAI/VibeThinker-1.5B"
8
+ SYSTEM_PROMPT = "You are a concise solver. Respond briefly."
9
+
10
+ # Load model and tokenizer
11
+ def load_model():
12
+ """Load the model and tokenizer"""
13
+ try:
14
+ print(f"Loading model: {MODEL_ID}")
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ MODEL_ID,
18
+ torch_dtype=torch.float16,
19
+ device_map="auto",
20
+ )
21
+ print("Model loaded successfully!")
22
+ return model, tokenizer
23
+ except Exception as e:
24
+ print(f"Error loading model: {e}")
25
+ raise
26
+
27
+ # Initialize model and tokenizer
28
+ try:
29
+ model, tokenizer = load_model()
30
+ except Exception as e:
31
+ print(f"Failed to load model: {e}")
32
+ model = None
33
+ tokenizer = None
34
+
35
+ @spaces.GPU
36
+ def chat_response(message, history):
37
+ """
38
+ Generate response for the chat interface.
39
+
40
+ Args:
41
+ message (str): Current user message
42
+ history (list): Chat history as list of tuples [(user_msg, assistant_msg), ...]
43
+
44
+ Returns:
45
+ str: Generated response
46
+ """
47
+ if model is None or tokenizer is None:
48
+ return "Model not loaded. Please check the model configuration."
49
+
50
+ try:
51
+ # Build conversation format
52
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
53
+
54
+ # Add chat history
55
+ for user_msg, assistant_msg in history:
56
+ messages.append({
57
+ "role": "user", "content": user_msg
58
+ })
59
+ messages.append({
60
+ "role": "assistant", "content": assistant_msg
61
+ })
62
+
63
+ # Add current message
64
+ messages.append({
65
+ "role": "user", "content": message
66
+ })
67
+
68
+ # Apply chat template
69
+ formatted_input = tokenizer.apply_chat_template(
70
+ messages,
71
+ tokenize=False,
72
+ add_generation_prompt=True
73
+ )
74
+
75
+ # Tokenize input
76
+ model_inputs = tokenizer([formatted_input], return_tensors="pt").to(model.device)
77
+
78
+ # Generate response
79
+ with torch.no_grad():
80
+ generated_ids = model.generate(
81
+ **model_inputs,
82
+ max_new_tokens=512,
83
+ do_sample=True,
84
+ temperature=0.7,
85
+ top_p=0.9,
86
+ pad_token_id=tokenizer.eos_token_id
87
+ )
88
+
89
+ # Decode response
90
+ generated_ids = [
91
+ output_ids[len(input_ids):]
92
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
93
+ ]
94
+
95
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
96
+
97
+ return response.strip()
98
+
99
+ except Exception as e:
100
+ print(f"Error generating response: {e}")
101
+ return f"Sorry, I encountered an error: {str(e)}"
102
+
103
+ def create_demo():
104
+ """Create the Gradio chat interface"""
105
+
106
+ # Create chat interface
107
+ demo = gr.ChatInterface(
108
+ fn=chat_response,
109
+ title="VibeThinker-1.5B Chat",
110
+ description=f"Chat with {MODEL_ID}. {SYSTEM_PROMPT}",
111
+ examples=[
112
+ "What is 2+2?",
113
+ "Explain quantum physics briefly",
114
+ "Write a short poem",
115
+ "How do I make good decisions?"
116
+ ],
117
+ theme=gr.themes.Soft(),
118
+ show_progress="minimal",
119
+ retry_btn="🔄 Retry",
120
+ undo_btn="↩️ Undo",
121
+ clear_btn="🗑️ Clear",
122
+ )
123
+
124
+ return demo
125
+
126
+ if __name__ == "__main__":
127
+ demo = create_demo()
128
+ demo.launch(share=False)
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.7.1
2
+ transformers==4.36.0
3
+ accelerate==0.25.0
4
+ torch>=2.0.0
5
+ spaces==0.19.4
6
+ title: VibeThinker-1.5B Chat
7
+ emoji: 🤖
8
+ colorFrom: blue
9
+ colorTo: pink
10
+ sdk: gradio
11
+ sdk_version: 4.7.1
12
+ app_port: 7860
13
+ hardware: zero-gpu
14
+ Simple chat interface for the VibeThinker-1.5B model.
15
+ ZeroGPU hardware support
16
+ Interactive chat interface
17
+ Built with Gradio
18
+ Model runs directly in the browser using ZeroGPU inference
19
+ What is 2+2?
20
+ Explain quantum physics briefly
21
+ Write a short poem
22
+ How do I make good decisions?