Spaces:

alex4cip
/

simple-chat

Sleeping

alex4cip Claude commited on Oct 29

Commit

83d2348

1 Parent(s): e6282fc

refactor: Replace ChatInterface with pure Blocks to fix HTTP/2 errors

BREAKING CHANGE: Complete rewrite of chat interface

- Remove ChatInterface completely (causes forced SSE usage)
- Implement manual chat UI using gr.Blocks
- Use tuple format for chat history [[user, bot], ...]
- No queue system, no SSE, no HTTP/2 protocol errors

This is the definitive fix for ERR_HTTP2_PROTOCOL_ERROR and
500 Internal Server Error issues on Hugging Face Spaces.

ChatInterface internally enforces queue/SSE which is incompatible
with HF Spaces infrastructure. Pure Blocks gives full control.

Changes:
- chat_response() now returns updated history list
- Manual gr.Chatbot + gr.Textbox + gr.Button layout
- Direct .click() and .submit() event handlers
- No .queue() call anywhere

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +62 -35

app.py CHANGED Viewed

@@ -119,32 +119,35 @@ def load_model(model_name):
 def chat_response(message, history, model_name):
     """
-    Generate chatbot response
     Args:
         message: User input
-        history: Chat history in Gradio format
         model_name: Selected model
     Returns:
-        Response text
     """
     try:
         # Load model and tokenizer
         model, tokenizer = load_model(model_name)
         if model is None or tokenizer is None:
-            return f"❌ 모델 '{model_name}'을 로드할 수 없습니다. 다른 모델을 선택해주세요."
         model_config = MODELS[model_name]
-        # Build conversation context
         conversation = ""
-        for msg in history:
-            if msg["role"] == "user":
-                conversation += f"{msg['content']}\n"
-            elif msg["role"] == "assistant":
-                conversation += f"{msg['content']}\n"
         # Add current message
         conversation += f"{message}\n"
@@ -173,7 +176,7 @@ def chat_response(message, history, model_name):
         if not response:
             response = "I understand. Could you tell me more?"
-        return response
     except Exception as e:
         import traceback
@@ -187,11 +190,11 @@ def chat_response(message, history, model_name):
         print("=" * 50)
         if "out of memory" in error_msg.lower() or "oom" in error_msg.lower():
-            return "❌ 메모리 부족. 더 작은 모델을 선택하거나 앱을 재시작하세요."
         elif "cuda" in error_msg.lower() and device == "cpu":
-            return "⚠️ GPU 없이 CPU로 실행 중입니다. 응답이 느릴 수 있습니다."
         else:
-            return f"❌ 오류: {error_type}\n{error_msg[:200]}\n\n터미널에서 전체 로그를 확인하세요."
 # Global state
@@ -257,25 +260,49 @@ with gr.Blocks(
     # Warning message for model requirements
     model_warning = gr.Markdown("", visible=False)
-    # Chat interface
-    chatbot = gr.ChatInterface(
-        fn=chat_response,
-        type="messages",
-        additional_inputs=[model_dropdown],
-        chatbot=gr.Chatbot(
-            height=500,
-            placeholder="메시지를 입력하세요...",
-            type="messages",
-        ),
-        textbox=gr.Textbox(
             placeholder="💬 메시지를 입력하세요 (영어 권장)...",
-            container=False,
-            scale=7,
             autofocus=True,
             elem_classes="chatbot-input",
-            show_label=False,
-        ),
-        # concurrency_limit 제거 - 기본값 사용, .queue() 호출하지 않아 큐 비활성화
     )
     # Examples section with model switching
@@ -305,10 +332,10 @@ with gr.Blocks(
     def set_example_4():
         return "kyujinpy/KoT-Llama2-7B-Chat", "인공지능에 대해 간단히 설명해주세요."
-    example_btn_1.click(set_example_1, outputs=[model_dropdown, chatbot.textbox])
-    example_btn_2.click(set_example_2, outputs=[model_dropdown, chatbot.textbox])
-    example_btn_3.click(set_example_3, outputs=[model_dropdown, chatbot.textbox])
-    example_btn_4.click(set_example_4, outputs=[model_dropdown, chatbot.textbox])
     # Show warning and clear chat when model changes
     def on_model_change(new_model):
@@ -331,7 +358,7 @@ with gr.Blocks(
     model_dropdown.change(
         fn=on_model_change,
         inputs=[model_dropdown],
-        outputs=[chatbot.chatbot_state, model_warning, model_warning],
     )
     gr.Markdown(

 def chat_response(message, history, model_name):
     """
+    Generate chatbot response - Returns updated history (for Blocks)
     Args:
         message: User input
+        history: Chat history as list of [user_msg, bot_msg] pairs
         model_name: Selected model
     Returns:
+        Updated history list
     """
+    if not message or not message.strip():
+        return history
     try:
         # Load model and tokenizer
         model, tokenizer = load_model(model_name)
         if model is None or tokenizer is None:
+            return history + [[message, f"❌ 모델 '{model_name}'을 로드할 수 없습니다."]]
         model_config = MODELS[model_name]
+        # Build conversation context from history
         conversation = ""
+        for user_msg, bot_msg in history:
+            if user_msg:
+                conversation += f"{user_msg}\n"
+            if bot_msg:
+                conversation += f"{bot_msg}\n"
         # Add current message
         conversation += f"{message}\n"
         if not response:
             response = "I understand. Could you tell me more?"
+        return history + [[message, response]]
     except Exception as e:
         import traceback
         print("=" * 50)
         if "out of memory" in error_msg.lower() or "oom" in error_msg.lower():
+            return history + [[message, "❌ 메모리 부족. 더 작은 모델을 선택하거나 앱을 재시작하세요."]]
         elif "cuda" in error_msg.lower() and device == "cpu":
+            return history + [[message, "⚠️ GPU 없이 CPU로 실행 중입니다. 응답이 느릴 수 있습니다."]]
         else:
+            return history + [[message, f"❌ 오류: {error_type}\n{error_msg[:200]}"]]
 # Global state
     # Warning message for model requirements
     model_warning = gr.Markdown("", visible=False)
+    # Chat interface using pure Blocks (NO ChatInterface to avoid SSE issues)
+    chatbot_display = gr.Chatbot(
+        height=500,
+        label="💬 대화",
+        show_label=False,
+        type="tuples",  # Use tuple format [[user_msg, bot_msg], ...]
+    )
+    with gr.Row():
+        msg_input = gr.Textbox(
             placeholder="💬 메시지를 입력하세요 (영어 권장)...",
+            show_label=False,
+            scale=9,
             autofocus=True,
             elem_classes="chatbot-input",
+        )
+        submit_btn = gr.Button("전송", scale=1, variant="primary")
+    clear_btn = gr.Button("🗑️ 대화 초기화", size="sm")
+    # Message submission handler
+    def submit_message(message, history, model):
+        updated_history = chat_response(message, history, model)
+        return updated_history, ""  # Return updated history and clear input
+    # Button click event (NO queue)
+    submit_btn.click(
+        fn=submit_message,
+        inputs=[msg_input, chatbot_display, model_dropdown],
+        outputs=[chatbot_display, msg_input],
+    )
+    # Enter key event
+    msg_input.submit(
+        fn=submit_message,
+        inputs=[msg_input, chatbot_display, model_dropdown],
+        outputs=[chatbot_display, msg_input],
+    )
+    # Clear button
+    clear_btn.click(
+        fn=lambda: [],
+        outputs=chatbot_display,
     )
     # Examples section with model switching
     def set_example_4():
         return "kyujinpy/KoT-Llama2-7B-Chat", "인공지능에 대해 간단히 설명해주세요."
+    example_btn_1.click(set_example_1, outputs=[model_dropdown, msg_input])
+    example_btn_2.click(set_example_2, outputs=[model_dropdown, msg_input])
+    example_btn_3.click(set_example_3, outputs=[model_dropdown, msg_input])
+    example_btn_4.click(set_example_4, outputs=[model_dropdown, msg_input])
     # Show warning and clear chat when model changes
     def on_model_change(new_model):
     model_dropdown.change(
         fn=on_model_change,
         inputs=[model_dropdown],
+        outputs=[chatbot_display, model_warning, model_warning],
     )
     gr.Markdown(