hsuwill000 commited on
Commit
1af149a
·
verified ·
1 Parent(s): 1d32e1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -81
app.py CHANGED
@@ -1,81 +1,71 @@
1
- import socket
2
- import subprocess
3
- import gradio as gr
4
- from openai import OpenAI
5
-
6
- def get_local_ip():
7
- # 建立一個 UDP socket,連到外部伺服器(不會真的發送資料)
8
- s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
9
- try:
10
- # 這裡用 Google 的公共 DNS IP 來確保路徑有效
11
- s.connect(("8.8.8.8", 80))
12
- ip = s.getsockname()[0]
13
- except Exception:
14
- ip = "127.0.0.1"
15
- finally:
16
- s.close()
17
- return ip
18
-
19
- print("本機 IP:", get_local_ip())
20
-
21
- # ✅ 設定 base URL 連接本地 llama.cpp API
22
- client = OpenAI(
23
- base_url="http://0.0.0.0:8000/v1",
24
- api_key="sk-local", # llama.cpp 不檢查內容,只要有就行
25
- timeout=600
26
- )
27
-
28
- # 回應函式(改成 stream 模式)
29
- def respond(
30
- message,
31
- history: list[tuple[str, str]],
32
- system_message,
33
- max_tokens,
34
- temperature,
35
- top_p,
36
- ):
37
- messages = [{"role": "system", "content": system_message}]
38
-
39
- for user, assistant in history:
40
- if user:
41
- messages.append({"role": "user", "content": user})
42
- if assistant:
43
- messages.append({"role": "assistant", "content": assistant})
44
-
45
- messages.append({"role": "user", "content": message})
46
-
47
- try:
48
- # 🔹 修改 1: 開啟 stream 模式
49
- stream = client.chat.completions.create(
50
- model="qwen3", # ⚠️ 替換成你 llama.cpp 載入的模型名稱
51
- messages=messages,
52
- max_tokens=max_tokens,
53
- temperature=temperature,
54
- top_p=top_p,
55
- stream=True,
56
- )
57
-
58
- output = ""
59
- # 🔹 修改 2: 逐步處理流式回應
60
- for chunk in stream:
61
- delta = chunk.choices[0].delta.content or ""
62
- output += delta
63
- yield output # 即時回傳給 Gradio
64
-
65
- except Exception as e:
66
- print(f"[Error] {e}")
67
- yield "⚠️ Llama.cpp server 沒有回應,請稍後再試。"
68
-
69
- # ✅ Gradio 介面(修改 3: 啟用 generator)
70
- demo = gr.ChatInterface(
71
- respond,
72
- additional_inputs=[
73
- gr.Textbox(value="You are a friendly assistant.", label="System message"),
74
- gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"),
75
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
76
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
77
- ],
78
- )
79
-
80
- if __name__ == "__main__":
81
- demo.launch()
 
1
+ ### app.py
2
+ import socket
3
+ import gradio as gr
4
+ from openai import OpenAI
5
+
6
+ def get_local_ip():
7
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
8
+ try:
9
+ s.connect(("8.8.8.8", 80))
10
+ ip = s.getsockname()[0]
11
+ except Exception:
12
+ ip = "127.0.0.1"
13
+ finally:
14
+ s.close()
15
+ return ip
16
+
17
+ print("本機 IP:", get_local_ip())
18
+
19
+ # 設定 base URL 連接本地 llama.cpp API
20
+ client = OpenAI(
21
+ base_url="http://0.0.0.0:8000/v1",
22
+ api_key="sk-local", # llama.cpp 不檢查內容,只要有就行
23
+ timeout=600
24
+ )
25
+
26
+ # ✅ 回應函式 (流式 generator)
27
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
28
+ # history list of dict: [{"role": "user"/"assistant", "content": "..."}]
29
+ messages = [{"role": "system", "content": system_message}]
30
+ messages.extend(history) # 直接加入舊對話
31
+ messages.append({"role": "user", "content": message})
32
+
33
+ try:
34
+ stream = client.chat.completions.create(
35
+ model="qwen3", # ⚠️ 替換成你 llama.cpp 載入的模型 general.name
36
+ messages=messages,
37
+ max_tokens=max_tokens,
38
+ temperature=temperature,
39
+ top_p=top_p,
40
+ stream=True,
41
+ )
42
+
43
+ output = ""
44
+ for chunk in stream:
45
+ # 🔍 Debug log
46
+ # print("[DEBUG] chunk:", chunk)
47
+
48
+ if chunk.choices:
49
+ delta = chunk.choices[0].delta
50
+ if delta and delta.content:
51
+ output += delta.content
52
+ yield {"role": "assistant", "content": output}
53
+
54
+ except Exception as e:
55
+ print(f"[Error] {e}")
56
+ yield {"role": "assistant", "content": "⚠️ Llama.cpp server 沒有回應,請稍後再試。"}
57
+
58
+ # ✅ Gradio 介面 (新版必須用 type="messages")
59
+ demo = gr.ChatInterface(
60
+ respond,
61
+ type="messages", # 🔑 使用 OpenAI 風格訊息格式
62
+ additional_inputs=[
63
+ gr.Textbox(value="You are a friendly assistant.", label="System message"),
64
+ gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"),
65
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
66
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
67
+ ],
68
+ )
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()