| | import gradio as gr |
| |
|
| | MODEL_NAME = "likhonhfai/mysterious-coding-model" |
| |
|
| | def load_model(): |
| | """ |
| | Attempt to lazily import transformers and torch and load the CodeAI model. |
| | Returns (model, tokenizer) if loaded successfully, otherwise (None, None). |
| | """ |
| | try: |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | import torch |
| | model = AutoModelForCausalLM.from_pretrained( |
| | MODEL_NAME, |
| | torch_dtype=torch.float16, |
| | device_map="auto", |
| | trust_remote_code=True, |
| | ) |
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| | return model, tokenizer |
| | except Exception: |
| | return None, None |
| |
|
| |
|
| | |
| | model, tokenizer = load_model() |
| |
|
| |
|
| | def respond(message, history): |
| | """ |
| | Generate a response using the loaded model or provide a placeholder message. |
| | """ |
| | |
| | if model is not None and tokenizer is not None: |
| | import torch |
| | prompt = "" |
| | for user_msg, bot_msg in history: |
| | prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" |
| | prompt += f"User: {message}\nAssistant:" |
| | inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) |
| | with torch.no_grad(): |
| | output_ids = model.generate( |
| | inputs, |
| | max_new_tokens=256, |
| | temperature=0.7, |
| | top_p=0.95, |
| | pad_token_id=tokenizer.eos_token_id, |
| | ) |
| | output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
| | if "Assistant:" in output_text: |
| | return output_text.split("Assistant:")[-1].strip() |
| | else: |
| | return output_text.strip() |
| |
|
| | |
| | lower = message.lower() |
| | if "hello" in lower: |
| | return ( |
| | "Hello! I'm a placeholder chatbot while the full CodeAI model loads. Ask me about long-context processing, " |
| | "multimodal understanding, or code generation." |
| | ) |
| | if "code" in lower: |
| | return ( |
| | "Our model excels at code generation, completion, bug fixing, refactoring and documentation. " |
| | "Try asking: 'write a python function to add two numbers'." |
| | ) |
| | if "image" in lower: |
| | return "The CodeAI model supports image understanding tasks like visual question answering and image captioning." |
| | if "audio" in lower or "speech" in lower: |
| | return "Our model can process audio for speech recognition and audio understanding." |
| | if "thanks" in lower or "thank you" in lower: |
| | return "You're welcome! Let me know if you have more questions." |
| | return ( |
| | "This is a demo placeholder response. The CodeAI model uses safetensors storage, supports 8-bit and mxfp4 " |
| | "mixed-precision variants, is compatible with the vLLM engine, and is trained using Hugging Face AutoTrain. " |
| | "It handles long contexts (up to 200,000 tokens) and performs text, image, audio, and multimodal reasoning tasks." |
| | ) |
| |
|
| |
|
| | demo = gr.ChatInterface( |
| | fn=respond, |
| | title="Mysterious Coding Chatbot", |
| | description=( |
| | "Chat with our CodeAI model about coding, AI and more. The model supports long-context understanding, " |
| | "text, image and audio processing, and multimodal reasoning. If the full model can't load due to resource limits, " |
| | "the chatbot will provide informative placeholder responses." |
| | ), |
| | ) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|