drdudddd commited on
Commit
d8f87ee
·
verified ·
1 Parent(s): 247f4fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ import os
5
+
6
+ # Modell-Download (GGUF ist perfekt für CPU)
7
+ print("Lade DeepSeek-R1 (GGUF) herunter...")
8
+ model_path = hf_hub_download(
9
+ repo_id="unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
10
+ filename="DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf"
11
+ )
12
+
13
+ # Initialisierung (n_threads=4 für HF Free CPU)
14
+ llm = Llama(
15
+ model_path=model_path,
16
+ n_ctx=2048,
17
+ n_threads=4
18
+ )
19
+
20
+ def respond(message, history):
21
+ # DeepSeek-R1 Denk-Prozess einleiten
22
+ prompt = f"User: {message}\nAssistant: <think>\n"
23
+
24
+ stream = llm(
25
+ prompt,
26
+ max_tokens=1024,
27
+ stop=["User:", "<|endoftext|>"],
28
+ stream=True,
29
+ temperature=0.7
30
+ )
31
+
32
+ response = ""
33
+ for chunk in stream:
34
+ token = chunk["choices"][0]["text"]
35
+ response += token
36
+ yield response
37
+
38
+ # Gradio UI auf Port 7860 (Standard für HF)
39
+ demo = gr.ChatInterface(fn=respond, title="DeepSeek-R1 CPU Docker")
40
+
41
+ if __name__ == "__main__":
42
+ demo.launch(server_name="0.0.0.0", server_port=7860)