Spaces:

K00B404
/

GenericChatBot

Build error

App Files Files Community

S-Dreamer commited on Apr 15, 2025

Commit

e4233b9

verified ·

1 Parent(s): c57a0e5

Update app.py

Browse files

✅ What’s Improved
- Corrected missing import
- Robust error handling with feedback in UI
- Async model usage via asyncio.to_thread
- Modularized prompt building and response extraction
- Input validation
- Docstrings for every function
- Type annotations for clarity
- Clear comments and section separation
- Graceful fallback if model fails to load
- No blocking UI operations

Files changed (1) hide show

app.py +93 -60

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
-'''🔧 Prerequisites
-Install the necessary packages:
-pip install gradio transformers
-📱 Gradio Chatbot App Code
-'''
 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
-# List of available premium models
-premium_models = [
     "HuggingFaceH4/zephyr-7b-beta",
     "K00B404/BagOClownCoders-slerp-7B",
     "Qwen/Qwen2.5-Omni-7B",
@@ -21,57 +22,111 @@ premium_models = [
     "Alibaba-NLP/gte-Qwen2-7B-instruct",
 ]
-# Dictionary to cache loaded pipelines
-pipeline_cache = {}
-# Initial system prompt
-default_system_prompt = "You are a ChatBuddy and chat with the user in a Human way."
-def load_pipeline(model_name):
-    if model_name not in pipeline_cache:
         print(f"Loading model: {model_name}")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
-        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
         pipeline_cache[model_name] = pipe
-    return pipeline_cache[model_name]
-def chatbot(user_input, history, model_choice):
-    pipe = load_pipeline(model_choice)
-    # Prepare the chat messages
-    messages = [{"role": "system", "content": default_system_prompt}]
     for pair in history:
         messages.append({"role": "user", "content": pair[0]})
         messages.append({"role": "assistant", "content": pair[1]})
     messages.append({"role": "user", "content": user_input})
-    # Flatten into a prompt string
     prompt = ""
     for msg in messages:
-        if msg["role"] == "system":
-            prompt += f"<|system|> {msg['content']}\n"
-        elif msg["role"] == "user":
-            prompt += f"<|user|> {msg['content']}\n"
-        elif msg["role"] == "assistant":
-            prompt += f"<|assistant|> {msg['content']}\n"
-    # Generate a response
-    response = pipe(prompt, max_new_tokens=200, do_sample=True, top_p=0.95, temperature=0.7)[0]['generated_text']
-    # Extract only the last assistant response
-    split_res = response.split("<|assistant|>")
-    final_response = split_res[-1].strip() if len(split_res) > 1 else response
     history.append((user_input, final_response))
     return "", history
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 ChatBuddy - Advanced Chatbot with Selectable LLMs")
     with gr.Row():
-        model_choice = gr.Dropdown(label="Select Model", choices=premium_models, value=premium_models[0])
     chatbot_ui = gr.Chatbot()
     user_input = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
     clear_btn = gr.Button("Clear")
@@ -82,25 +137,3 @@ with gr.Blocks() as demo:
     clear_btn.click(lambda: ([], ""), None, [chatbot_ui, state])
 demo.launch()
-'''
-✅ Features:
-    Model selection from dropdown
-    Maintains chat history
-    Respects a system prompt
-    Uses text-generation pipeline
-🧠 Optional Upgrades:
-    Replace text-generation with chat-completion if models support it (like OpenChat, Mistral-instruct, etc.)
-    Add streaming or token-by-token response if supported
-    Save/load chat history
-    Add support for vision models (Qwen2.5-VL-7B-Instruct) using a different UI tab
-'''

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Path: chatbot_app.py
+Description: Gradio-based chatbot with selectable Hugging Face LLMs, using transformers pipelines.
+"""
 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
+import asyncio
+from typing import List, Tuple, Dict
+# ✅ List of available premium models
+PREMIUM_MODELS = [
     "HuggingFaceH4/zephyr-7b-beta",
     "K00B404/BagOClownCoders-slerp-7B",
     "Qwen/Qwen2.5-Omni-7B",
     "Alibaba-NLP/gte-Qwen2-7B-instruct",
 ]
+# ✅ Cache for loaded pipelines
+pipeline_cache: Dict[str, pipeline] = {}
+# ✅ Initial system prompt
+DEFAULT_SYSTEM_PROMPT = "You are a ChatBuddy and chat with the user in a Human way."
+def load_pipeline(model_name: str) -> pipeline:
+    """
+    Load and cache the text generation pipeline for the given model.
+    """
+    if model_name in pipeline_cache:
+        return pipeline_cache[model_name]
+    try:
         print(f"Loading model: {model_name}")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        )
+        pipe = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device=0 if torch.cuda.is_available() else -1,
+        )
         pipeline_cache[model_name] = pipe
+        return pipe
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model '{model_name}': {str(e)}")
+def build_prompt(user_input: str, history: List[Tuple[str, str]]) -> str:
+    """
+    Construct the prompt string with system prompt, history, and current user input.
+    """
+    messages = [{"role": "system", "content": DEFAULT_SYSTEM_PROMPT}]
     for pair in history:
         messages.append({"role": "user", "content": pair[0]})
         messages.append({"role": "assistant", "content": pair[1]})
     messages.append({"role": "user", "content": user_input})
     prompt = ""
     for msg in messages:
+        role_tag = f"<|{msg['role']}|>"
+        prompt += f"{role_tag} {msg['content']}\n"
+    return prompt
+def extract_response(generated_text: str) -> str:
+    """
+    Extract the last assistant response from generated text.
+    """
+    if "<|assistant|>" in generated_text:
+        split_res = generated_text.split("<|assistant|>")
+        return split_res[-1].strip()
+    return generated_text.strip()
+async def chatbot(
+    user_input: str, history: List[Tuple[str, str]], model_choice: str
+) -> Tuple[str, List[Tuple[str, str]]]:
+    """
+    Main chatbot logic to generate model response asynchronously.
+    """
+    if not user_input.strip():
+        return "", history  # Ignore empty inputs
+    try:
+        pipe = await asyncio.to_thread(load_pipeline, model_choice)
+        prompt = build_prompt(user_input, history)
+        response = await asyncio.to_thread(
+            pipe,
+            prompt,
+            max_new_tokens=200,
+            do_sample=True,
+            top_p=0.95,
+            temperature=0.7,
+        )
+        generated_text = response[0]["generated_text"]
+        final_response = extract_response(generated_text)
+    except RuntimeError as load_err:
+        final_response = str(load_err)
+    except Exception as e:
+        final_response = f"⚠️ Error during generation: {str(e)}"
     history.append((user_input, final_response))
     return "", history
+# ✅ Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 ChatBuddy - Advanced Chatbot with Selectable LLMs")
     with gr.Row():
+        model_choice = gr.Dropdown(
+            label="Select Model", choices=PREMIUM_MODELS, value=PREMIUM_MODELS[0]
+        )
     chatbot_ui = gr.Chatbot()
     user_input = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
     clear_btn = gr.Button("Clear")
     clear_btn.click(lambda: ([], ""), None, [chatbot_ui, state])
 demo.launch()