MuangMuangE commited on
Commit
e2f81da
ยท
verified ยท
1 Parent(s): aed3858

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+ # โœ๏ธ ์•„๋ž˜ ๋‘ ์ค„์„ ๋ณธ์ธ์˜ ๋ชจ๋ธ ์ •๋ณด๋กœ ๋ณ€๊ฒฝํ•˜์„ธ์š”
5
+ # ํŒŒ์ธํŠœ๋‹ ๋ชจ๋ธ: "YOUR_USERNAME/qwen3-4b-ft-gguf"
6
+ # ๊ณต๊ฐœ ๋ชจ๋ธ ์˜ˆ์‹œ: "unsloth/Qwen3-0.6B-GGUF" (ํ…Œ์ŠคํŠธ์šฉ)
7
+ REPO_ID = "unsloth/Qwen3-4B-GGUF" # โœ๏ธ HF Hub ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ID
8
+ # REPO_ID = "MuangMuangE/Qwen3-4B-GGUF" # โœ๏ธ HF Hub ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ID
9
+ FILENAME = "Qwen3-4B-Q4_K_M.gguf" # โœ๏ธ ํŒŒ์ผ๋ช… (.gguf ํ™•์žฅ์ž ํ•„์ˆ˜)
10
+
11
+ # โœ๏ธ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ โ€” ์ฑ—๋ด‡์˜ ์—ญํ• ๊ณผ ๋งํˆฌ๋ฅผ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค
12
+ SYSTEM_PROMPT = "๋‹น์‹ ์€ ์นœ์ ˆํ•œ ํ•œ๊ตญ์–ด AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค."
13
+
14
+ # โš ๏ธ ๋ชจ๋ธ์„ ์•ฑ ์‹œ์ž‘ ์‹œ ๋ฐ”๋กœ ๋กœ๋”ฉํ•˜๋ฉด HF Spaces ํ—ฌ์Šค์ฒดํฌ(30๋ถ„) ํƒ€์ž„์•„์›ƒ ๋ฐœ์ƒ
15
+ # โ†’ ํ•ด๊ฒฐ: ๋ชจ๋ธ ๋กœ๋”ฉ์„ ์ฒซ ๋ฒˆ์งธ ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์‹œ์ ์œผ๋กœ ์ง€์—ฐ (Lazy Loading)
16
+ llm = None
17
+
18
+ def get_model():
19
+ """
20
+ ์ฒซ ํ˜ธ์ถœ ์‹œ์—๋งŒ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œ + ๋กœ๋”ฉํ•ฉ๋‹ˆ๋‹ค.
21
+ ์ดํ›„ ํ˜ธ์ถœ์—์„œ๋Š” ์ด๋ฏธ ๋กœ๋”ฉ๋œ ๋ชจ๋ธ์„ ์žฌ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
22
+ - from_pretrained: hf_hub_download + Llama ์ดˆ๊ธฐํ™”๋ฅผ ํ•œ ์ค„๋กœ ์ฒ˜๋ฆฌ
23
+ - n_ctx: ์ปจํ…์ŠคํŠธ ๊ธธ์ด (๋ฉ”๋ชจ๋ฆฌ ํ™•๋ณด๋ฅผ ์œ„ํ•ด ์ž‘๊ฒŒ ์„ค์ •)
24
+ - n_threads: CPU Basic = 2 vCPU์— ๋งž์ถค
25
+ """
26
+ global llm
27
+ if llm is None:
28
+ llm = Llama.from_pretrained(
29
+ repo_id=REPO_ID,
30
+ filename=FILENAME,
31
+ n_ctx=2048, # โœ๏ธ ์ปจํ…์ŠคํŠธ ๊ธธ์ด (๋ชจ๋ธ ์ตœ๋Œ€: 40960, ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ ์œ„ํ•ด 2048)
32
+ n_threads=2, # โœ๏ธ CPU Basic = 2 vCPU
33
+ verbose=False, # ๋กœ๋”ฉ ์‹œ ์ƒ์„ธ ๋กœ๊ทธ ์ˆจ๊น€
34
+ )
35
+ return llm
36
+
37
+ def respond(message, history):
38
+ """
39
+ ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€๋ฅผ ๋ฐ›์•„ ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฐฉ์‹์œผ๋กœ ์‘๋‹ต์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
40
+ - message: ํ˜„์žฌ ์‚ฌ์šฉ์ž ์ž…๋ ฅ
41
+ - history: ์ด์ „ ๋Œ€ํ™” ๋‚ด์—ญ (Gradio ChatInterface๊ฐ€ ์ž๋™ ๊ด€๋ฆฌ)
42
+ """
43
+ # ์ฒซ ํ˜ธ์ถœ ์‹œ ๋ชจ๋ธ ๋กœ๋”ฉ (1~2๋ถ„ ์†Œ์š”๋  ์ˆ˜ ์žˆ์Œ)
44
+ model = get_model()
45
+
46
+ # ๋Œ€ํ™” ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ: ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ + ์ด์ „ ๋Œ€ํ™” + ํ˜„์žฌ ์ž…๋ ฅ
47
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
48
+ for msg in history:
49
+ messages.append(msg)
50
+ messages.append({"role": "user", "content": message})
51
+
52
+ # ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฐฉ์‹์œผ๋กœ ํ† ํฐ์„ ํ•˜๋‚˜์”ฉ ์ƒ์„ฑํ•˜์—ฌ ์‹ค์‹œ๊ฐ„ ์ถœ๋ ฅ
53
+ response = ""
54
+ for chunk in model.create_chat_completion(
55
+ messages=messages,
56
+ temperature=0.7, # โœ๏ธ ์ฐฝ์˜์„ฑ ์กฐ์ ˆ (0.0=๊ฒฐ์ •์ , 1.0=์ฐฝ์˜์ )
57
+ max_tokens=512, # โœ๏ธ ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด
58
+ stream=True, # ์ŠคํŠธ๋ฆฌ๋ฐ ํ™œ์„ฑํ™”
59
+ ):
60
+ delta = chunk["choices"][0]["delta"].get("content", "")
61
+ response += delta
62
+ yield response # Gradio์— ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ „๋‹ฌ
63
+
64
+ # โœ๏ธ Gradio ChatInterface: ์ฑ—๋ด‡ UI๋ฅผ ์ž๋™์œผ๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค
65
+ demo = gr.ChatInterface(
66
+ fn=respond,
67
+ title="Qwen3 GGUF ์ฑ—๋ด‡", # โœ๏ธ ์ œ๋ชฉ
68
+ description="์ฒซ ์‘๋‹ต ์‹œ ๋ชจ๋ธ์„ ๋กœ๋”ฉํ•ฉ๋‹ˆ๋‹ค (1~2๋ถ„ ์†Œ์š”)", # โœ๏ธ ์„ค๋ช…
69
+ examples=["์•ˆ๋…•ํ•˜์„ธ์š”!", "ํŒŒ์ด์ฌ์ด๋ž€ ๋ฌด์—‡์ธ๊ฐ€์š”?"], # โœ๏ธ ์˜ˆ์‹œ ์งˆ๋ฌธ
70
+ )
71
+
72
+ # server_name="0.0.0.0": ์™ธ๋ถ€ ์ ‘์† ํ—ˆ์šฉ (Docker ํ•„์ˆ˜)
73
+ # server_port=7860: HF Spaces ๊ธฐ๋ณธ ํฌํŠธ
74
+ demo.launch(server_name="0.0.0.0", server_port=7860)