# app.py for Hugging Face Spaces import os import requests import gradio as gr import time from datetime import datetime # Configuration abdelac/Mistral_Test HF_TOKEN = os.getenv("HF_TOKEN", "") MODEL_NAME = os.getenv("MODEL_NAME", "abdelac/Mistral_Test") # Use different API URL format for Spaces API_URL = f"https://router.huggingface.co/models/{MODEL_NAME}" HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} # Cache for API health api_status_cache = {"last_check": 0, "status": None} def check_api_status(): """Check if the API is accessible""" try: response = requests.head(API_URL, headers=HEADERS, timeout=5) return { "available": response.status_code in [200, 503], "status_code": response.status_code, "message": "API is accessible" if response.status_code == 200 else "Model is loading" } except: return {"available": False, "status_code": None, "message": "Cannot connect to API"} def query_model(prompt, max_tokens=256, temperature=0.7): """Query the model with error handling""" if not prompt.strip(): return "āš ļø Please enter a prompt" if not HF_TOKEN: return "šŸ” Please add your HF_TOKEN in Space Settings → Repository secrets" payload = { "inputs": prompt, "parameters": { "max_new_tokens": max_tokens, "temperature": temperature, "return_full_text": False }, "options": {"wait_for_model": True} } try: start = time.time() response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=30) if response.status_code == 200: result = response.json() elapsed = time.time() - start if isinstance(result, list) and len(result) > 0: text = result[0].get("generated_text", str(result)) return f"{text}\n\nā±ļø Generated in {elapsed:.2f}s" else: return f"Response format unexpected: {result}" elif response.status_code == 503: return "šŸ”„ Model is loading. Please wait 30 seconds and try again." elif response.status_code == 401: return "šŸ” Invalid token. Please check your HF_TOKEN." else: error = response.json().get("error", response.text[:200]) return f"āŒ Error {response.status_code}: {error}" except requests.exceptions.Timeout: return "ā±ļø Request timeout. Try reducing max tokens." except Exception as e: return f"āš ļø Error: {str(e)}" # Create Gradio Interface with gr.Blocks( title="Mistral Test Model", theme=gr.themes.Soft(), css=""" .gradio-container {max-width: 800px; margin: auto;} .status {padding: 10px; border-radius: 5px; margin: 10px 0;} .ok {background: #d4edda; color: #155724;} .warn {background: #fff3cd; color: #856404;} .error {background: #f8d7da; color: #721c24;} """ ) as demo: gr.Markdown(""" # šŸ¤– Mistral Test Model ### Testing Hugging Face Model Deployment This Space demonstrates deployment of the `abdelac/Mistral_Test` model. """) # Status display status_display = gr.Markdown("") # Main interface with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Your Prompt", placeholder="Type your message here...", lines=5 ) with gr.Row(): max_tokens = gr.Slider( minimum=32, maximum=512, value=256, step=32, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature" ) generate_btn = gr.Button("Generate", variant="primary") clear_btn = gr.Button("Clear") with gr.Column(): output = gr.Textbox( label="Model Response", lines=8, interactive=False ) # Examples gr.Examples( examples=[ ["Explain quantum computing in simple terms:"], ["Write a short poem about AI:"], ["What is the capital of France?"], ["How to make a cup of coffee:"] ], inputs=prompt, label="Try these examples" ) # Instructions with gr.Accordion("šŸ“– Setup Instructions", open=False): gr.Markdown(f""" ## How to Set Up This Space: 1. **Click "Duplicate this Space"** (top right) to create your own copy 2. **Add your HF_TOKEN** in Settings → Repository secrets: - Go to [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) - Create a new token with "read" access - Add it as `HF_TOKEN` in your Space settings 3. **Optional**: Change model in Settings → Variables: - Add variable: `MODEL_NAME` = `abdelac/Mistral_Test` - Or use any other model name 4. **The Space will automatically deploy** with your configuration **Current Model**: `{MODEL_NAME}` """) # Functions def update_status(): status = check_api_status() if status["available"]: if status["status_code"] == 200: return f"""
āœ… API Status: Ready
Model can be queried successfully
""" else: return f"""
āš ļø API Status: Loading
Model is starting up (Code: {status['status_code']})
""" else: return f"""
āŒ API Status: Unavailable
{status['message']}
""" def clear(): return ["", 256, 0.7, ""] # Event handlers generate_btn.click( fn=query_model, inputs=[prompt, max_tokens, temperature], outputs=output ) clear_btn.click( fn=clear, outputs=[prompt, max_tokens, temperature, output] ) # Auto-check on load demo.load( fn=update_status, outputs=status_display ) # Launch if __name__ == "__main__": # Print debug info print("=" * 50) print(f"Model: {MODEL_NAME}") print(f"Token present: {'Yes' if HF_TOKEN else 'No'}") print(f"API URL: {API_URL}") print("=" * 50) demo.launch(share=False)