kainatq commited on
Commit
5f5bc69
·
verified ·
1 Parent(s): 92c792d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +254 -0
app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import time
4
+ import os
5
+
6
+ # Configuration
7
+ MODEL_REPO = "kainatq/quantum-keek-7b-Q4_K_M-GGUF"
8
+ MODEL_FILE = "quantum-keek-7b-q4_k_m.gguf"
9
+ MODEL_PATH = f"./{MODEL_FILE}"
10
+
11
+ # Initialize the model
12
+ def load_model():
13
+ try:
14
+ # Download model if not exists
15
+ if not os.path.exists(MODEL_PATH):
16
+ print("Downloading model... This may take a while.")
17
+ from huggingface_hub import hf_hub_download
18
+ hf_hub_download(
19
+ repo_id=MODEL_REPO,
20
+ filename=MODEL_FILE,
21
+ local_dir=".",
22
+ local_dir_use_symlinks=False
23
+ )
24
+
25
+ # Initialize Llama with CPU optimization
26
+ llm = Llama(
27
+ model_path=MODEL_PATH,
28
+ n_ctx=4096, # Context window
29
+ n_threads=2, # Use both vCPUs
30
+ n_batch=512,
31
+ use_mlock=False, # Don't lock memory (limited RAM)
32
+ use_mmap=True, # Use memory mapping
33
+ verbose=False
34
+ )
35
+ print("Model loaded successfully!")
36
+ return llm
37
+ except Exception as e:
38
+ print(f"Error loading model: {e}")
39
+ return None
40
+
41
+ # Load the model
42
+ llm = load_model()
43
+
44
+ def chat_with_ai(message, history, system_prompt, temperature, max_tokens):
45
+ """
46
+ Function to handle chat interactions with the AI model
47
+ """
48
+ if llm is None:
49
+ return "Error: Model not loaded. Please check the console for details."
50
+
51
+ # Prepare conversation history
52
+ conversation = []
53
+
54
+ # Add system prompt
55
+ if system_prompt:
56
+ conversation.append({"role": "system", "content": system_prompt})
57
+
58
+ # Add history
59
+ for human, assistant in history:
60
+ conversation.extend([
61
+ {"role": "user", "content": human},
62
+ {"role": "assistant", "content": assistant}
63
+ ])
64
+
65
+ # Add current message
66
+ conversation.append({"role": "user", "content": message})
67
+
68
+ try:
69
+ # Create prompt from conversation
70
+ prompt = ""
71
+ for msg in conversation:
72
+ if msg["role"] == "system":
73
+ prompt += f"System: {msg['content']}\n\n"
74
+ elif msg["role"] == "user":
75
+ prompt += f"User: {msg['content']}\n\n"
76
+ elif msg["role"] == "assistant":
77
+ prompt += f"Assistant: {msg['content']}\n\n"
78
+
79
+ prompt += "Assistant:"
80
+
81
+ # Generate response
82
+ start_time = time.time()
83
+
84
+ response = llm(
85
+ prompt,
86
+ max_tokens=max_tokens,
87
+ temperature=temperature,
88
+ top_p=0.95,
89
+ stop=["User:", "System:"],
90
+ echo=False,
91
+ stream=False
92
+ )
93
+
94
+ generation_time = time.time() - start_time
95
+ answer = response['choices'][0]['text'].strip()
96
+
97
+ # Add generation info
98
+ tokens_used = response['usage']['total_tokens']
99
+ answer += f"\n\n---\n*Generated in {generation_time:.2f}s using {tokens_used} tokens*"
100
+
101
+ return answer
102
+
103
+ except Exception as e:
104
+ return f"Error generating response: {str(e)}"
105
+
106
+ def clear_chat():
107
+ """Clear the chat history"""
108
+ return [], ""
109
+
110
+ # Custom CSS for ChatGPT-like styling
111
+ custom_css = """
112
+ #chatbot {
113
+ min-height: 400px;
114
+ border: 1px solid #e0e0e0;
115
+ border-radius: 10px;
116
+ padding: 20px;
117
+ background: #f9f9f9;
118
+ }
119
+ .gradio-container {
120
+ max-width: 1200px !important;
121
+ margin: 0 auto !important;
122
+ }
123
+ .dark #chatbot {
124
+ background: #1e1e1e;
125
+ border-color: #444;
126
+ }
127
+ """
128
+
129
+ # Create the Gradio interface
130
+ with gr.Blocks(
131
+ title="🪐 Quantum Keek Chat",
132
+ theme=gr.themes.Soft(),
133
+ css=custom_css
134
+ ) as demo:
135
+
136
+ gr.Markdown(
137
+ """
138
+ # 🪐 Quantum Keek Chat
139
+ *Powered by Quantum Keek 7B GGUF - Running on CPU with llama.cpp*
140
+ """
141
+ )
142
+
143
+ with gr.Row():
144
+ with gr.Column(scale=1):
145
+ gr.Markdown("### Configuration")
146
+
147
+ system_prompt = gr.Textbox(
148
+ label="System Prompt",
149
+ value="You are Quantum Keek, a helpful AI assistant. Provide detailed, thoughtful responses to user queries.",
150
+ lines=3,
151
+ placeholder="Enter system instructions..."
152
+ )
153
+
154
+ temperature = gr.Slider(
155
+ minimum=0.1,
156
+ maximum=1.0,
157
+ value=0.7,
158
+ step=0.1,
159
+ label="Temperature",
160
+ info="Higher values = more creative, Lower values = more focused"
161
+ )
162
+
163
+ max_tokens = gr.Slider(
164
+ minimum=100,
165
+ maximum=2048,
166
+ value=512,
167
+ step=50,
168
+ label="Max Tokens",
169
+ info="Maximum length of response"
170
+ )
171
+
172
+ clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
173
+
174
+ gr.Markdown(
175
+ """
176
+ ---
177
+ **Model Info:**
178
+ - **Model:** Quantum Keek 7B Q4_K_M
179
+ - **Platform:** CPU (llama.cpp)
180
+ - **Context:** 4096 tokens
181
+ """
182
+ )
183
+
184
+ with gr.Column(scale=2):
185
+ chatbot = gr.Chatbot(
186
+ label="🪐 Quantum Keek",
187
+ elem_id="chatbot",
188
+ height=500,
189
+ show_copy_button=True
190
+ )
191
+
192
+ msg = gr.Textbox(
193
+ label="Your message",
194
+ placeholder="Type your message here...",
195
+ lines=2,
196
+ max_lines=5
197
+ )
198
+
199
+ with gr.Row():
200
+ submit_btn = gr.Button("🚀 Send", variant="primary")
201
+ stop_btn = gr.Button("⏹️ Stop", variant="secondary")
202
+
203
+ # Event handlers
204
+ submit_event = msg.submit(
205
+ fn=chat_with_ai,
206
+ inputs=[msg, chatbot, system_prompt, temperature, max_tokens],
207
+ outputs=[chatbot]
208
+ ).then(
209
+ lambda: "", # Clear input
210
+ outputs=[msg]
211
+ )
212
+
213
+ submit_btn.click(
214
+ fn=chat_with_ai,
215
+ inputs=[msg, chatbot, system_prompt, temperature, max_tokens],
216
+ outputs=[chatbot]
217
+ ).then(
218
+ lambda: "", # Clear input
219
+ outputs=[msg]
220
+ )
221
+
222
+ clear_btn.click(
223
+ fn=clear_chat,
224
+ outputs=[chatbot, msg]
225
+ )
226
+
227
+ # Stop button functionality
228
+ def stop_generation():
229
+ # This is a placeholder - in a real implementation you'd need to handle streaming
230
+ return "Generation stopped by user."
231
+
232
+ stop_btn.click(
233
+ fn=stop_generation,
234
+ outputs=[msg]
235
+ )
236
+
237
+ gr.Markdown(
238
+ """
239
+ ---
240
+ **Note:** This is running on Hugging Face Spaces free tier (2vCPU, 16GB RAM).
241
+ Responses may take a few seconds to generate.
242
+ """
243
+ )
244
+
245
+ if __name__ == "__main__":
246
+ # Set huggingface token if needed (for gated models)
247
+ # os.environ["HUGGINGFACE_HUB_TOKEN"] = "your_token_here"
248
+
249
+ demo.launch(
250
+ server_name="0.0.0.0",
251
+ server_port=7860,
252
+ share=False,
253
+ show_error=True
254
+ )