AnatoliiG commited on
Commit
54f6dae
·
1 Parent(s): 9834c86

refactor code

Browse files
Files changed (5) hide show
  1. app.py +12 -135
  2. config.py +9 -0
  3. model.py +44 -0
  4. ui.py +165 -0
  5. utils.py +8 -0
app.py CHANGED
@@ -1,37 +1,17 @@
 
1
  import json
2
- import traceback
3
 
4
- import gradio as gr
5
  import uvicorn
6
  from fastapi import FastAPI, Request
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse, StreamingResponse
9
  from gradio import mount_gradio_app
10
- from huggingface_hub import hf_hub_download
11
- from llama_cpp import Llama
12
 
13
- # --- КОНФИГУРАЦИЯ ---
14
- REPO_ID = "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF"
15
- FILENAME = "qwen2.5-coder-7b-instruct-q5_k_m.gguf"
16
- CONTEXT_SIZE = 8192
17
- DEFAULT_MAX_TOKENS = 4096
18
 
19
- print(f"Loading model {REPO_ID}...")
20
- try:
21
- model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
22
- llm = Llama(
23
- model_path=model_path,
24
- n_ctx=CONTEXT_SIZE,
25
- n_threads=4, # Оптимизация для CPU Spaces
26
- n_gpu_layers=0, # Явно указываем 0 для CPU
27
- n_batch=512,
28
- verbose=True,
29
- )
30
- except Exception as e:
31
- print(f"Critical Error: {e}")
32
- llm = None
33
-
34
- # --- API (FastAPI) ---
35
  app = FastAPI()
36
  app.add_middleware(
37
  CORSMiddleware,
@@ -42,19 +22,20 @@ app.add_middleware(
42
  )
43
 
44
 
 
45
  @app.post("/v1/chat/completions")
46
  async def chat_completions(request: Request):
47
- if not llm:
48
  return JSONResponse(content={"error": "Model not loaded"}, status_code=500)
49
 
50
  try:
51
  data = await request.json()
52
  messages = data.get("messages", [])
53
  stream = data.get("stream", False)
54
- temperature = data.get("temperature", 0.4)
55
- max_tokens = data.get("max_tokens", DEFAULT_MAX_TOKENS)
56
 
57
- output = llm.create_chat_completion(
58
  messages=messages,
59
  max_tokens=max_tokens,
60
  temperature=temperature,
@@ -75,112 +56,8 @@ async def chat_completions(request: Request):
75
  return JSONResponse(content={"error": str(e)}, status_code=500)
76
 
77
 
78
- # --- ЛОГИКА ГЕНЕРАЦИИ ДЛЯ GRADIO ---
79
-
80
-
81
- def user_input(user_message, history):
82
- # Если история пуста, инициализируем список
83
- if history is None:
84
- history = []
85
- # Возвращаем список словарей (формат Gradio 5)
86
- return "", history + [{"role": "user", "content": user_message}]
87
-
88
-
89
- def bot_response(history, system_prompt, temperature, max_tokens):
90
- if not llm:
91
- history.append({"role": "assistant", "content": "Error: Model failed to load."})
92
- yield history
93
- return
94
-
95
- # Формируем сообщения для Llama
96
- messages = [{"role": "system", "content": system_prompt}]
97
-
98
- # Берем последние 10 сообщений для контекста
99
- relevant_history = history[-10:] if len(history) > 10 else history
100
-
101
- for msg in relevant_history:
102
- content = msg["content"]
103
-
104
- if isinstance(content, list):
105
- content = "\n".join(str(item) for item in content)
106
-
107
- messages.append({"role": msg["role"], "content": str(content)})
108
-
109
- history.append({"role": "assistant", "content": ""})
110
-
111
- partial_text = ""
112
- try:
113
- stream = llm.create_chat_completion(
114
- messages=messages,
115
- max_tokens=int(max_tokens),
116
- temperature=float(temperature),
117
- stream=True,
118
- )
119
-
120
- for chunk in stream:
121
- delta = chunk["choices"][0]["delta"]
122
- if "content" in delta:
123
- partial_text += delta["content"]
124
- # Обновляем последнее сообщение
125
- history[-1]["content"] = partial_text
126
- yield history
127
-
128
- except Exception as e:
129
- traceback.print_exc()
130
- history[-1]["content"] = partial_text + f"\n\n❌ **Error:** {str(e)}"
131
- yield history
132
-
133
-
134
- # --- ИНТЕРФЕЙС (Gradio Blocks) ---
135
-
136
- custom_css = """
137
- #chatbot {
138
- height: 70vh !important;
139
- overflow: auto;
140
- }
141
- """
142
-
143
- theme = gr.themes.Soft(primary_hue="blue", text_size="lg")
144
-
145
- with gr.Blocks(theme=theme, css=custom_css, title="Qwen Coder Pro") as demo:
146
- gr.Markdown("# 💻 Qwen 2.5 Coder Assistant")
147
-
148
- with gr.Row():
149
- # Настройки
150
- with gr.Column(scale=1, min_width=250):
151
- gr.Markdown("### ⚙️ Settings")
152
- system_prompt = gr.Textbox(
153
- label="System Prompt",
154
- value="Ты экспертный агент-кодер. Напиши чистый код.",
155
- lines=3,
156
- )
157
- temperature = gr.Slider(0.0, 1.0, value=0.4, label="Temperature")
158
- max_tokens = gr.Slider(512, 8192, value=4096, label="Max Tokens")
159
- clear_btn = gr.Button("🗑️ Clear Chat")
160
-
161
- # Чат
162
- with gr.Column(scale=4):
163
- chatbot = gr.Chatbot(
164
- label="Conversation",
165
- elem_id="chatbot",
166
- avatar_images=(None, "https://api.iconify.design/noto:robot.svg"),
167
- )
168
-
169
- msg = gr.Textbox(
170
- show_label=False, placeholder="Type your code question here...", lines=2
171
- )
172
- submit_btn = gr.Button("Run ➤", variant="primary")
173
-
174
- # Связка событий
175
- msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
176
- bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
177
- )
178
- submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
179
- bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
180
- )
181
- # Очистка возвращает пустой список
182
- clear_btn.click(lambda: [], None, chatbot, queue=False)
183
-
184
  app = mount_gradio_app(app, demo, path="/")
185
 
186
  if __name__ == "__main__":
 
1
+ # app.py
2
  import json
 
3
 
 
4
  import uvicorn
5
  from fastapi import FastAPI, Request
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from fastapi.responses import JSONResponse, StreamingResponse
8
  from gradio import mount_gradio_app
 
 
9
 
10
+ import config
11
+ from model import engine
12
+ from ui import create_ui
 
 
13
 
14
+ # --- FastAPI Setup ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  app = FastAPI()
16
  app.add_middleware(
17
  CORSMiddleware,
 
22
  )
23
 
24
 
25
+ # --- API Endpoints ---
26
  @app.post("/v1/chat/completions")
27
  async def chat_completions(request: Request):
28
+ if not engine.llm:
29
  return JSONResponse(content={"error": "Model not loaded"}, status_code=500)
30
 
31
  try:
32
  data = await request.json()
33
  messages = data.get("messages", [])
34
  stream = data.get("stream", False)
35
+ temperature = data.get("temperature", config.DEFAULT_TEMP)
36
+ max_tokens = data.get("max_tokens", config.DEFAULT_MAX_TOKENS)
37
 
38
+ output = engine.generate(
39
  messages=messages,
40
  max_tokens=max_tokens,
41
  temperature=temperature,
 
56
  return JSONResponse(content={"error": str(e)}, status_code=500)
57
 
58
 
59
+ # --- Mount Gradio ---
60
+ demo = create_ui()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  app = mount_gradio_app(app, demo, path="/")
62
 
63
  if __name__ == "__main__":
config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ REPO_ID = "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF"
2
+ FILENAME = "qwen2.5-coder-7b-instruct-q5_k_m.gguf"
3
+
4
+ # Параметры модели
5
+ CONTEXT_SIZE = 8192
6
+ DEFAULT_MAX_TOKENS = 4096
7
+ DEFAULT_TEMP = 0.4
8
+ N_THREADS = 4
9
+ N_GPU_LAYERS = 0 # 0 для CPU, -1 для GPU
model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ from config import CONTEXT_SIZE, FILENAME, N_GPU_LAYERS, N_THREADS, REPO_ID
7
+
8
+
9
+ class ModelEngine:
10
+ def __init__(self):
11
+ self.llm = None
12
+ self._load_model()
13
+
14
+ def _load_model(self):
15
+ print(f"Loading model {REPO_ID}...")
16
+ try:
17
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
18
+ self.llm = Llama(
19
+ model_path=model_path,
20
+ n_ctx=CONTEXT_SIZE,
21
+ n_threads=N_THREADS,
22
+ n_gpu_layers=N_GPU_LAYERS,
23
+ n_batch=512,
24
+ verbose=True,
25
+ )
26
+ print("Model loaded successfully.")
27
+ except Exception as e:
28
+ print(f"CRITICAL ERROR: Failed to load model. {e}")
29
+ self.llm = None
30
+
31
+ def generate(self, messages, max_tokens, temperature, stream=True):
32
+ if not self.llm:
33
+ raise RuntimeError("Model is not loaded.")
34
+
35
+ return self.llm.create_chat_completion(
36
+ messages=messages,
37
+ max_tokens=int(max_tokens),
38
+ temperature=float(temperature),
39
+ stream=stream,
40
+ )
41
+
42
+
43
+ # Создаем глобальный экземпляр (Singleton)
44
+ engine = ModelEngine()
ui.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ui.py
2
+ import traceback
3
+
4
+ import gradio as gr
5
+
6
+ import config
7
+ from model import engine
8
+ from utils import sanitize_content
9
+
10
+ # --- CSS стили ---
11
+ CUSTOM_CSS = """
12
+ body, .gradio-container {
13
+ overflow: hidden !important;
14
+ height: 100vh !important;
15
+ max_height: 100vh !important;
16
+ }
17
+ #chatbot {
18
+ height: 100% !important;
19
+ flex-grow: 1;
20
+ overflow: auto;
21
+ font-family: 'Consolas', 'Monaco', monospace;
22
+ }
23
+ """
24
+
25
+ # --- Логика событий ---
26
+
27
+
28
+ def user_input(user_message, history):
29
+ if not user_message:
30
+ return None, history
31
+
32
+ if history is None:
33
+ history = []
34
+
35
+ # Очистка старой истории
36
+ clean_history = []
37
+ for msg in history:
38
+ clean_history.append(
39
+ {"role": msg["role"], "content": sanitize_content(msg.get("content", ""))}
40
+ )
41
+
42
+ clean_history.append({"role": "user", "content": str(user_message)})
43
+ return "", clean_history
44
+
45
+
46
+ def bot_response(history, system_prompt, temperature, max_tokens):
47
+ if not engine.llm:
48
+ history.append({"role": "assistant", "content": "Error: Model failed to load."})
49
+ yield history
50
+ return
51
+
52
+ # Подготовка сообщений
53
+ messages = [{"role": "system", "content": system_prompt}]
54
+ relevant_history = history[-15:] if len(history) > 15 else history
55
+
56
+ for msg in relevant_history:
57
+ messages.append(
58
+ {"role": msg["role"], "content": sanitize_content(msg.get("content", ""))}
59
+ )
60
+
61
+ history.append({"role": "assistant", "content": ""})
62
+
63
+ try:
64
+ stream = engine.generate(
65
+ messages=messages,
66
+ max_tokens=max_tokens,
67
+ temperature=temperature,
68
+ stream=True,
69
+ )
70
+
71
+ partial_text = ""
72
+ for chunk in stream:
73
+ delta = chunk["choices"][0]["delta"]
74
+ if "content" in delta:
75
+ partial_text += delta["content"]
76
+ history[-1]["content"] = partial_text
77
+ yield history
78
+
79
+ except Exception as e:
80
+ traceback.print_exc()
81
+ history[-1]["content"] = partial_text + f"\n\n❌ **Error:** {str(e)}"
82
+ yield history
83
+
84
+
85
+ def set_interactive(is_interactive):
86
+ return (
87
+ gr.update(
88
+ interactive=is_interactive,
89
+ placeholder="Wait for response..."
90
+ if not is_interactive
91
+ else "Type code question...",
92
+ ),
93
+ gr.update(interactive=is_interactive),
94
+ )
95
+
96
+
97
+ # --- Создание интерфейса ---
98
+
99
+
100
+ def create_ui():
101
+ theme = gr.themes.Soft(primary_hue="blue", text_size="lg")
102
+
103
+ with gr.Blocks(
104
+ theme=theme, css=CUSTOM_CSS, title="Qwen Coder Pro", fill_height=True
105
+ ) as demo:
106
+ with gr.Sidebar():
107
+ gr.Markdown("### ⚙️ Settings")
108
+ system_prompt = gr.Textbox(
109
+ label="System Prompt",
110
+ value="You are an expert coding assistant. Write clean, efficient code.",
111
+ lines=5,
112
+ )
113
+ temperature = gr.Slider(
114
+ 0.0, 1.0, value=config.DEFAULT_TEMP, label="Temperature"
115
+ )
116
+ max_tokens = gr.Slider(
117
+ 512, 8192, value=config.DEFAULT_MAX_TOKENS, label="Max Tokens"
118
+ )
119
+ clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
120
+
121
+ with gr.Column(fill_height=True):
122
+ chatbot = gr.Chatbot(
123
+ label="Code Assistant",
124
+ elem_id="chatbot",
125
+ avatar_images=(None, "https://api.iconify.design/noto:robot.svg"),
126
+ show_copy_button=True,
127
+ scale=1,
128
+ bubble_full_width=False,
129
+ )
130
+
131
+ with gr.Row(variant="compact"):
132
+ msg = gr.Textbox(
133
+ show_label=False,
134
+ placeholder="Type your code question here...",
135
+ lines=1,
136
+ scale=8,
137
+ autofocus=True,
138
+ max_lines=3,
139
+ )
140
+ submit_btn = gr.Button(
141
+ "Run ➤", variant="primary", scale=1, min_width=100
142
+ )
143
+
144
+ # Chains
145
+ submit_event = (
146
+ msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False)
147
+ .then(lambda: set_interactive(False), None, [msg, submit_btn], queue=False)
148
+ .then(
149
+ bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
150
+ )
151
+ .then(lambda: set_interactive(True), None, [msg, submit_btn], queue=False)
152
+ )
153
+
154
+ click_event = (
155
+ submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False)
156
+ .then(lambda: set_interactive(False), None, [msg, submit_btn], queue=False)
157
+ .then(
158
+ bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
159
+ )
160
+ .then(lambda: set_interactive(True), None, [msg, submit_btn], queue=False)
161
+ )
162
+
163
+ clear_btn.click(lambda: [], None, chatbot, queue=False)
164
+
165
+ return demo
utils.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def sanitize_content(content):
2
+ """
3
+ Гарантирует, что контент - это строка.
4
+ Исправляет баг Gradio, когда текст приходит как список.
5
+ """
6
+ if isinstance(content, list):
7
+ return "\n".join(str(item) for item in content)
8
+ return str(content) if content is not None else ""