Spaces:

Vishinka
/

Code_LLM

Sleeping

App Files Files Community

AnatoliiG commited on Jan 18

Commit

c581315

1 Parent(s): 11d7c2d

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -22

app.py CHANGED Viewed

@@ -22,7 +22,8 @@ try:
     llm = Llama(
         model_path=model_path,
         n_ctx=CONTEXT_SIZE,
-        n_threads=2,
         n_batch=512,
         verbose=True,
     )
@@ -78,30 +79,25 @@ async def chat_completions(request: Request):
 def user_input(user_message, history):
-    return "", history + [[user_message, None]]
 def bot_response(history, system_prompt, temperature, max_tokens):
     if not llm:
-        history[-1][1] = "Error: Model failed to load. Check logs."
         yield history
         return
-    # Конвертируем историю Gradio (списки) в формат Llama (словари)
     messages = [{"role": "system", "content": system_prompt}]
-    # Берем последние 10 диалогов для контекста
-    relevant_history = history[-11:-1] if len(history) > 1 else []
-    for user_msg, assistant_msg in relevant_history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # Добавляем последнее сообщение пользователя
-    last_user_msg = history[-1][0]
-    messages.append({"role": "user", "content": last_user_msg})
     partial_text = ""
     try:
@@ -116,13 +112,12 @@ def bot_response(history, system_prompt, temperature, max_tokens):
             delta = chunk["choices"][0]["delta"]
             if "content" in delta:
                 partial_text += delta["content"]
-                # Обновляем последнее сообщение ассистента в истории (классический формат)
-                history[-1][1] = partial_text
                 yield history
     except Exception as e:
         traceback.print_exc()
-        history[-1][1] = partial_text + f"\n\n❌ **Error:** {str(e)}"
         yield history
@@ -155,13 +150,10 @@ with gr.Blocks(theme=theme, css=custom_css, title="Qwen Coder Pro") as demo:
         # Чат
         with gr.Column(scale=4):
-            # ВАЖНО: Убрали type="messages", используем стандартный формат
             chatbot = gr.Chatbot(
                 label="Conversation",
                 elem_id="chatbot",
-                # show_copy_button=True,
                 avatar_images=(None, "https://api.iconify.design/noto:robot.svg"),
-                # type="tuples",
             )
             msg = gr.Textbox(
@@ -176,7 +168,7 @@ with gr.Blocks(theme=theme, css=custom_css, title="Qwen Coder Pro") as demo:
     submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
     )
-    clear_btn.click(lambda: None, None, chatbot, queue=False)
 app = mount_gradio_app(app, demo, path="/")

     llm = Llama(
         model_path=model_path,
         n_ctx=CONTEXT_SIZE,
+        n_threads=4,  # Оптимизация для CPU Spaces
+        n_gpu_layers=0,  # Явно указываем 0 для CPU
         n_batch=512,
         verbose=True,
     )
 def user_input(user_message, history):
+    # Gradio 5: История - это список словарей. Добавляем сообщение пользователя.
+    if history is None:
+        history = []
+    return "", history + [{"role": "user", "content": user_message}]
 def bot_response(history, system_prompt, temperature, max_tokens):
     if not llm:
+        history.append({"role": "assistant", "content": "Error: Model failed to load."})
         yield history
         return
     messages = [{"role": "system", "content": system_prompt}]
+    relevant_history = history[-10:] if len(history) > 10 else history
+    for msg in relevant_history:
+        messages.append({"role": msg["role"], "content": msg["content"]})
+    history.append({"role": "assistant", "content": ""})
     partial_text = ""
     try:
             delta = chunk["choices"][0]["delta"]
             if "content" in delta:
                 partial_text += delta["content"]
+                history[-1]["content"] = partial_text
                 yield history
     except Exception as e:
         traceback.print_exc()
+        history[-1]["content"] = partial_text + f"\n\n❌ **Error:** {str(e)}"
         yield history
         # Чат
         with gr.Column(scale=4):
             chatbot = gr.Chatbot(
                 label="Conversation",
                 elem_id="chatbot",
                 avatar_images=(None, "https://api.iconify.design/noto:robot.svg"),
             )
             msg = gr.Textbox(
     submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot_response, [chatbot, system_prompt, temperature, max_tokens], chatbot
     )
+    clear_btn.click(lambda: [], None, chatbot, queue=False)
 app = mount_gradio_app(app, demo, path="/")