AI_Chatbot / appold.py
LejobuildYT's picture
Rename app.py to appold.py
18c1caa verified
#!/usr/bin/env python3
"""
Hugging Face Spaces Backend - Zephyr-7B-Beta Inference Server
Mit Plugin-System für Python
"""
import os
import json
import logging
import gradio as gr
from pathlib import Path
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import time
# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configuration
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_TOKENS = 512
TEMPERATURE = 0.7
TOP_P = 0.9
# Plugin System
PLUGIN_DIR = Path("plugins")
loaded_plugins = {}
def load_plugins():
"""Load Python plugins from plugins directory"""
if not PLUGIN_DIR.exists():
logger.warning(f"Plugin directory {PLUGIN_DIR} does not exist")
return
for plugin_file in PLUGIN_DIR.glob("*.py"):
if plugin_file.name.startswith("__"):
continue
try:
plugin_name = plugin_file.stem
logger.info(f"Loading plugin: {plugin_name}")
# Plugins können Hooks registrieren über: register_hook(name, callback)
loaded_plugins[plugin_name] = plugin_file
logger.info(f"✓ Plugin {plugin_name} loaded")
except Exception as e:
logger.error(f"✗ Failed to load plugin {plugin_file}: {e}")
def call_plugin_hook(hook_name, *args, **kwargs):
"""Call a plugin hook if it exists"""
for plugin_name, plugin_file in loaded_plugins.items():
try:
# Optional: Plugin Hook-System
pass
except Exception as e:
logger.error(f"Error calling hook in {plugin_name}: {e}")
# Initialize Model
logger.info(f"Loading model {MODEL_NAME} on {DEVICE}...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto" if DEVICE == "cuda" else None,
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
load_in_8bit=False,
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=0 if DEVICE == "cuda" else -1,
)
logger.info("✓ Model loaded successfully")
except Exception as e:
logger.error(f"✗ Failed to load model: {e}")
raise
def generate_response(prompt: str, system_prompt: str = None) -> dict:
"""
Generate response using Zephyr model
Args:
prompt: User input
system_prompt: Optional system prompt
Returns:
dict with response, tokens, and timing
"""
try:
start_time = time.time()
# Format prompt if system prompt provided
if system_prompt:
messages = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>\n"
else:
messages = f"<|user|>\n{prompt}\n<|assistant|>\n"
# Generate
outputs = pipe(
messages,
max_new_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
top_p=TOP_P,
do_sample=True,
return_full_text=False,
)
response_text = outputs[0]["generated_text"].strip()
elapsed = time.time() - start_time
result = {
"response": response_text,
"tokens": len(tokenizer.encode(response_text)),
"time_seconds": round(elapsed, 2),
"model": MODEL_NAME,
}
logger.info(f"Generated {result['tokens']} tokens in {result['time_seconds']}s")
return result
except Exception as e:
logger.error(f"Error generating response: {e}")
return {
"response": f"Error: {str(e)}",
"tokens": 0,
"time_seconds": 0,
"error": True,
}
# Load plugins
load_plugins()
# Gradio Interface
with gr.Blocks(title="Zephyr-7B AI Chatbot") as demo:
gr.Markdown("# 🤖 Zephyr-7B Inference Server")
gr.Markdown("Powered by Hugging Face & Gradio")
with gr.Row():
with gr.Column(scale=2):
system_prompt = gr.Textbox(
label="System Prompt",
placeholder="Optional: Definiere die Rolle des Assistenten...",
lines=3,
)
with gr.Column(scale=1):
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=2.0,
value=TEMPERATURE,
step=0.1,
)
top_p = gr.Slider(
label="Top P",
minimum=0.0,
maximum=1.0,
value=TOP_P,
step=0.05,
)
user_input = gr.Textbox(
label="Your Message",
placeholder="Type your message...",
lines=4,
)
submit_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
output_response = gr.Textbox(
label="Response",
interactive=False,
lines=6,
)
output_stats = gr.JSON(label="Statistics", interactive=False)
def process_input(prompt, system, temp, top):
old_temp = globals()["TEMPERATURE"]
old_top = globals()["TOP_P"]
globals()["TEMPERATURE"] = temp
globals()["TOP_P"] = top
result = generate_response(prompt, system)
globals()["TEMPERATURE"] = old_temp
globals()["TOP_P"] = old_top
stats = {
"Tokens": result["tokens"],
"Time (s)": result["time_seconds"],
"Model": result["model"],
}
return result["response"], stats
submit_btn.click(
process_input,
inputs=[user_input, system_prompt, temperature, top_p],
outputs=[output_response, output_stats],
)
# Examples
gr.Examples(
examples=[
["Was ist Machine Learning?", "Du bist ein hilfsbereiter AI-Assistent.", 0.7, 0.9],
["Schreibe einen kurzen Witz", "", 0.9, 0.95],
["Erkläre Quantencomputing in einfachen Worten", "Du bist ein Physik-Professor.", 0.7, 0.9],
],
inputs=[user_input, system_prompt, temperature, top_p],
)
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
)