likhonhfai's picture
Refactor app.py: lazy imports and fallback to avoid missing dependencies
5bad6b6 verified
import gradio as gr
MODEL_NAME = "likhonhfai/mysterious-coding-model"
def load_model():
"""
Attempt to lazily import transformers and torch and load the CodeAI model.
Returns (model, tokenizer) if loaded successfully, otherwise (None, None).
"""
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
return model, tokenizer
except Exception:
return None, None
# Load the model once at startup
model, tokenizer = load_model()
def respond(message, history):
"""
Generate a response using the loaded model or provide a placeholder message.
"""
# If the model is available, generate a response using it
if model is not None and tokenizer is not None:
import torch # Safe to import since it was available during model loading
prompt = ""
for user_msg, bot_msg in history:
prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
prompt += f"User: {message}\nAssistant:"
inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
output_ids = model.generate(
inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id,
)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
if "Assistant:" in output_text:
return output_text.split("Assistant:")[-1].strip()
else:
return output_text.strip()
# Fallback responses when the model is unavailable
lower = message.lower()
if "hello" in lower:
return (
"Hello! I'm a placeholder chatbot while the full CodeAI model loads. Ask me about long-context processing, "
"multimodal understanding, or code generation."
)
if "code" in lower:
return (
"Our model excels at code generation, completion, bug fixing, refactoring and documentation. "
"Try asking: 'write a python function to add two numbers'."
)
if "image" in lower:
return "The CodeAI model supports image understanding tasks like visual question answering and image captioning."
if "audio" in lower or "speech" in lower:
return "Our model can process audio for speech recognition and audio understanding."
if "thanks" in lower or "thank you" in lower:
return "You're welcome! Let me know if you have more questions."
return (
"This is a demo placeholder response. The CodeAI model uses safetensors storage, supports 8-bit and mxfp4 "
"mixed-precision variants, is compatible with the vLLM engine, and is trained using Hugging Face AutoTrain. "
"It handles long contexts (up to 200,000 tokens) and performs text, image, audio, and multimodal reasoning tasks."
)
demo = gr.ChatInterface(
fn=respond,
title="Mysterious Coding Chatbot",
description=(
"Chat with our CodeAI model about coding, AI and more. The model supports long-context understanding, "
"text, image and audio processing, and multimodal reasoning. If the full model can't load due to resource limits, "
"the chatbot will provide informative placeholder responses."
),
)
if __name__ == "__main__":
demo.launch()