import gradio as gr MODEL_NAME = "likhonhfai/mysterious-coding-model" def load_model(): """ Attempt to lazily import transformers and torch and load the CodeAI model. Returns (model, tokenizer) if loaded successfully, otherwise (None, None). """ try: from transformers import AutoModelForCausalLM, AutoTokenizer import torch model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) return model, tokenizer except Exception: return None, None # Load the model once at startup model, tokenizer = load_model() def respond(message, history): """ Generate a response using the loaded model or provide a placeholder message. """ # If the model is available, generate a response using it if model is not None and tokenizer is not None: import torch # Safe to import since it was available during model loading prompt = "" for user_msg, bot_msg in history: prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" prompt += f"User: {message}\nAssistant:" inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): output_ids = model.generate( inputs, max_new_tokens=256, temperature=0.7, top_p=0.95, pad_token_id=tokenizer.eos_token_id, ) output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) if "Assistant:" in output_text: return output_text.split("Assistant:")[-1].strip() else: return output_text.strip() # Fallback responses when the model is unavailable lower = message.lower() if "hello" in lower: return ( "Hello! I'm a placeholder chatbot while the full CodeAI model loads. Ask me about long-context processing, " "multimodal understanding, or code generation." ) if "code" in lower: return ( "Our model excels at code generation, completion, bug fixing, refactoring and documentation. " "Try asking: 'write a python function to add two numbers'." ) if "image" in lower: return "The CodeAI model supports image understanding tasks like visual question answering and image captioning." if "audio" in lower or "speech" in lower: return "Our model can process audio for speech recognition and audio understanding." if "thanks" in lower or "thank you" in lower: return "You're welcome! Let me know if you have more questions." return ( "This is a demo placeholder response. The CodeAI model uses safetensors storage, supports 8-bit and mxfp4 " "mixed-precision variants, is compatible with the vLLM engine, and is trained using Hugging Face AutoTrain. " "It handles long contexts (up to 200,000 tokens) and performs text, image, audio, and multimodal reasoning tasks." ) demo = gr.ChatInterface( fn=respond, title="Mysterious Coding Chatbot", description=( "Chat with our CodeAI model about coding, AI and more. The model supports long-context understanding, " "text, image and audio processing, and multimodal reasoning. If the full model can't load due to resource limits, " "the chatbot will provide informative placeholder responses." ), ) if __name__ == "__main__": demo.launch()