Spaces:

yukee1992
/

Ai_chat

Paused

App Files Files Community

yukee1992 commited on Feb 7

Commit

a434ebb

verified ·

1 Parent(s): 66dcfeb

Update app.py

Browse files

Files changed (1) hide show

app.py +348 -412

app.py CHANGED Viewed

@@ -1,461 +1,397 @@
-# app.py - UPDATED WITH NEW HUGGINGFACE API
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from typing import Optional, List, Dict, Any
 import os
-import json
-import requests
 import logging
-from datetime import datetime
-import time
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(
-    title="AI Summarization API",
-    description="Free AI for Summarization and Viral Stories",
-    version="2.0.0"
 )
-# Get HuggingFace token from environment
-HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "")
-# NEW HUGGINGFACE API ENDPOINT
-HF_API_URL = "https://router.huggingface.co/huggingface"
-# WORKING MODELS with NEW API format
-MODELS = {
-    "qwen": "Qwen/Qwen2.5-7B-Instruct",  # Best for Chinese-English
-    "mistral": "mistralai/Mistral-7B-Instruct-v0.3",
-    "llama": "meta-llama/Llama-3.2-3B-Instruct",
-    "phi": "microsoft/phi-2",  # Lightweight
-    "gemma": "google/gemma-2-9b-it",  # Usually available
-    "zephyr": "HuggingFaceH4/zephyr-7b-beta"
-}
-class SummarizeRequest(BaseModel):
-    content: str
-    language: Optional[str] = "chinese"
-    max_length: Optional[int] = 150
-    min_length: Optional[int] = 50
-    model: Optional[str] = "qwen"
-class StoryRequest(BaseModel):
-    topic: str
-    platform: Optional[str] = "wechat"
-    language: Optional[str] = "chinese"
-    model: Optional[str] = "qwen"
-class ChatRequest(BaseModel):
-    prompt: str
-    model: Optional[str] = "qwen"
-    max_tokens: Optional[int] = 500
-def call_huggingface_api(model: str, prompt: str, max_tokens: int = 500) -> str:
-    """Call HuggingFace NEW Router API"""
-    if not HF_TOKEN:
-        raise Exception("HUGGINGFACE_TOKEN not configured. Please set it in environment variables.")
-    model_name = MODELS.get(model, MODELS["qwen"])
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    # Format prompt based on model
-    if "qwen" in model_name.lower():
-        formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-    elif "mistral" in model_name.lower():
-        formatted_prompt = f"<s>[INST] {prompt} [/INST]"
-    elif "llama" in model_name.lower():
-        formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-    else:
-        formatted_prompt = prompt
-    payload = {
-        "model": model_name,
-        "inputs": formatted_prompt,
-        "parameters": {
-            "max_new_tokens": max_tokens,
-            "temperature": 0.7,
-            "do_sample": True,
-            "return_full_text": False
-        }
-    }
-    try:
-        logger.info(f"📤 Calling HuggingFace Router API: {model_name}")
-        response = requests.post(
-            f"{HF_API_URL}/models/v1/{model_name}",
-            headers=headers,
-            json=payload,
-            timeout=60  # Increased timeout
-        )
-        if response.status_code == 200:
-            result = response.json()
-            # Parse response based on format
-            if isinstance(result, list):
-                if len(result) > 0:
-                    if isinstance(result[0], dict):
-                        if "generated_text" in result[0]:
-                            text = result[0]["generated_text"]
-                        else:
-                            text = str(result[0])
-                    else:
-                        text = str(result[0])
-                else:
-                    text = "No response generated"
-            elif isinstance(result, dict):
-                if "generated_text" in result:
-                    text = result["generated_text"]
-                elif "choices" in result:  # Chat format
-                    if len(result["choices"]) > 0:
-                        text = result["choices"][0].get("message", {}).get("content", "")
-                    else:
-                        text = "No choices available"
-                else:
-                    text = str(result)
-            else:
-                text = str(result)
-            # Clean up the response
-            if formatted_prompt in text:
-                text = text.replace(formatted_prompt, "").strip()
-            return text
-        elif response.status_code == 503:
-            # Model is loading
-            error_data = response.json()
-            error_msg = error_data.get("error", "Model is loading")
-            # Check if it's loading or unavailable
-            if "loading" in error_msg.lower():
-                logger.info("⏳ Model is loading, waiting 45 seconds...")
-                time.sleep(45)
-                # Try one more time with longer timeout
-                response = requests.post(
-                    f"{HF_API_URL}/models/v1/{model_name}",
-                    headers=headers,
-                    json=payload,
-                    timeout=90
-                )
-                if response.status_code == 200:
-                    return call_huggingface_api(model, prompt, max_tokens)
-                else:
-                    raise Exception(f"Model still loading after wait: {error_msg}")
-            else:
-                raise Exception(f"Model unavailable: {error_msg}")
-        elif response.status_code == 429:
-            # Rate limit
-            raise Exception("Rate limit exceeded. Please wait a moment and try again.")
-        else:
-            error_msg = response.text[:500]
-            logger.error(f"API Error {response.status_code}: {error_msg}")
-            # Try with a simpler model
-            if model != "phi":
-                logger.info(f"🔄 Trying with phi model instead...")
-                return call_huggingface_api("phi", prompt, max_tokens)
-            else:
-                raise Exception(f"API Error {response.status_code}: {error_msg}")
-    except requests.exceptions.Timeout:
-        # Try with a smaller model
-        if model != "phi":
-            logger.warning("⏰ Timeout, trying phi model...")
-            return call_huggingface_api("phi", prompt, max_tokens)
-        else:
-            raise Exception("Request timeout. Please try again with shorter text.")
-    except Exception as e:
-        raise Exception(f"API call failed: {str(e)}")
-@app.get("/")
-async def root():
-    return {
-        "status": "online",
-        "service": "AI Summarization API (v2.0)",
-        "models": list(MODELS.keys()),
-        "recommended_model": "qwen (for Chinese)",
-        "endpoints": {
-            "/health": "GET - Health check",
-            "/test": "GET - Test API",
-            "/test/{model}": "GET - Test specific model",
-            "/summarize": "POST - Summarize text",
-            "/create_story": "POST - Create viral story",
-            "/chat": "POST - General chat"
-        },
-        "note": "Using HuggingFace Router API"
-    }
-@app.get("/health")
-async def health():
-    hf_configured = bool(HF_TOKEN)
-    return {
-        "status": "healthy",
-        "huggingface_configured": hf_configured,
-        "available_models": len(MODELS),
-        "timestamp": datetime.now().isoformat(),
-        "api_version": "router.huggingface.co"
-    }
-@app.get("/test")
-async def test():
-    """Test with default model (qwen)"""
-    return await test_model("qwen")
-@app.get("/test/{model_name}")
-async def test_model(model_name: str):
-    """Test specific model"""
-    if not HF_TOKEN:
-        return {
-            "success": False,
-            "error": "HUGGINGFACE_TOKEN not configured",
-            "help": "Get free token from https://huggingface.co/settings/tokens"
-        }
-    if model_name not in MODELS:
-        return {
-            "success": False,
-            "error": f"Model '{model_name}' not available",
-            "available_models": list(MODELS.keys())
-        }
-    # Simple test prompt
-    test_prompt = "请用中文简单介绍人工智能，不超过100字。"
     try:
-        start_time = time.time()
-        response = call_huggingface_api(model_name, test_prompt, 100)
-        processing_time = time.time() - start_time
-        return {
-            "success": True,
-            "model": model_name,
-            "model_full_name": MODELS[model_name],
-            "response": response,
-            "response_preview": response[:200] + "..." if len(response) > 200 else response,
-            "length": len(response),
-            "processing_time_seconds": round(processing_time, 2)
         }
     except Exception as e:
-        return {
-            "success": False,
-            "model": model_name,
-            "error": str(e),
-            "help": "Try a different model or check token permissions"
         }
-@app.post("/summarize")
-async def summarize(request: SummarizeRequest):
-    """Summarize text with AI"""
-    start_time = time.time()
-    if not request.content or len(request.content.strip()) < 10:
-        raise HTTPException(status_code=400, detail="Content is too short (min 10 characters)")
-    # Create prompt based on language
-    if request.language.lower() in ["chinese", "zh", "cn"]:
-        prompt = f"""请用中文总结以下内容，提取3-5个关键要点，保持简洁：
-{request.content[:2000]}  # Limit content length
-总结："""
-    else:
-        prompt = f"""Please summarize the following content in English, extract 3-5 key points:
-{request.content[:2000]}
-Summary:"""
     try:
-        # Limit content length to avoid timeout
-        content = request.content[:2000] if len(request.content) > 2000 else request.content
-        # Call the AI
-        summary = call_huggingface_api(
-            model=request.model,
-            prompt=prompt,
-            max_tokens=min(request.max_length, 300)  # Limit tokens
-        )
-        processing_time = time.time() - start_time
-        return {
-            "success": True,
-            "summary": summary.strip(),
-            "model": request.model,
-            "model_full_name": MODELS.get(request.model, MODELS["qwen"]),
-            "original_length": len(request.content),
-            "summary_length": len(summary),
-            "processing_time_seconds": round(processing_time, 2),
-            "compression_ratio": f"{len(summary)/max(len(content), 1)*100:.1f}%"
         }
-    except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": f"Summarization failed: {str(e)}",
-                "suggestion": "Try with model='phi' for faster response"
-            }
         )
-@app.post("/create_story")
-async def create_story(request: StoryRequest):
-    """Create viral story"""
-    start_time = time.time()
-    # Create prompt
-    if request.language.lower() in ["chinese", "zh", "cn"]:
-        prompt = f"""创作一个关于"{request.topic}"的病毒式故事：
-要求：
-1. 提供3个吸引人的标题
-2. 故事简短有力（300字内）
-3. 适合{request.platform}平台
-4. 包含传播分析
-5. 添加相关标签
-请开始："""
-    else:
-        prompt = f"""Create a viral story about "{request.topic}" for {request.platform}:
-Requirements:
-1. Provide 3 catchy titles
-2. Keep story short (under 300 words)
-3. Include virality analysis
-4. Add relevant hashtags
-Start:"""
     try:
-        story = call_huggingface_api(
-            model=request.model,
-            prompt=prompt,
-            max_tokens=600
-        )
-        processing_time = time.time() - start_time
-        return {
-            "success": True,
-            "story": story.strip(),
-            "model": request.model,
-            "model_full_name": MODELS.get(request.model, MODELS["qwen"]),
-            "topic": request.topic,
-            "platform": request.platform,
-            "processing_time_seconds": round(processing_time, 2)
-        }
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": f"Story creation failed: {str(e)}",
-                "suggestion": "Try with model='phi' or shorten your topic"
-            }
-        )
-@app.post("/chat")
-async def chat(request: ChatRequest):
-    """General chat endpoint"""
-    start_time = time.time()
     try:
-        response = call_huggingface_api(
-            model=request.model,
-            prompt=request.prompt,
-            max_tokens=min(request.max_tokens, 1000)
-        )
-        processing_time = time.time() - start_time
-        return {
-            "success": True,
-            "response": response.strip(),
-            "model": request.model,
-            "model_full_name": MODELS.get(request.model, MODELS["qwen"]),
-            "processing_time_seconds": round(processing_time, 2)
-        }
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Chat failed: {str(e)}"
-        )
-# Simple fallback for testing
-@app.post("/summarize_simple")
-async def summarize_simple(request: Dict[str, Any]):
-    """Simple summarization without complex AI"""
-    content = request.get("content", "")
-    if not content or len(content) < 20:
-        return {
-            "success": True,
-            "summary": "内容太短，无法总结。",
-            "model": "simple",
-            "processing_time_seconds": 0.01
-        }
-    # Simple rule-based summarization for Chinese
-    if any(char in content for char in ["。", "！", "？"]):
-        sentences = []
-        for char in ["。", "！", "？"]:
-            if char in content:
-                parts = content.split(char)
-                sentences.extend([p + char for p in parts[:-1]])
-        if sentences:
-            summary = "。".join(sentences[:3]) + "。"
-        else:
-            summary = content[:100] + "..."
-    else:
-        summary = content[:100] + "..."
-    return {
-        "success": True,
-        "summary": summary,
-        "model": "simple_fallback",
-        "processing_time_seconds": 0.01
-    }
 if __name__ == "__main__":
-    import uvicorn
-    port = int(os.getenv("PORT", 7860))
-    logger.info("=" * 60)
-    logger.info("🚀 AI Summarization API (v2.0)")
-    logger.info(f"🔑 HuggingFace Token: {'✅ Configured' if HF_TOKEN else '❌ NOT CONFIGURED'}")
-    logger.info(f"🌐 Using API: router.huggingface.co")
-    logger.info(f"🤖 Available models: {list(MODELS.keys())}")
-    logger.info(f"⭐ Recommended for Chinese: qwen")
-    logger.info(f"⚡ Lightweight backup: phi")
-    logger.info("=" * 60)
-    if not HF_TOKEN:
-        logger.error("❌ ERROR: HUGGINGFACE_TOKEN is required!")
-        logger.info("ℹ️  Steps to fix:")
-        logger.info("1. Go to: https://huggingface.co/settings/tokens")
-        logger.info("2. Create new token with 'read' access")
-        logger.info("3. Add to Space: Settings → Repository secrets")
-        logger.info("4. Restart the Space")
-    uvicorn.run(app, host="0.0.0.0", port=port)

 import os
+import uuid
+import httpx
+import torch
 import logging
+import json
+import asyncio
+from typing import Dict, Optional, List, Union
+from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, Depends
+from fastapi.responses import JSONResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import uvicorn
+from contextlib import asynccontextmanager
+# Configuration
+MODEL_ID = "google/gemma-1.1-2b-it"
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+API_KEY = os.getenv("API_KEY", "default-key-123")
+MAX_TOKENS = int(os.getenv("MAX_TOKENS", "450"))
+DEVICE = os.getenv("DEVICE", "cpu")
+PORT = int(os.getenv("PORT", "7860"))
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger(__name__)
+# Security
+security = HTTPBearer()
+# Job storage
+jobs: Dict[str, dict] = {}
+class AIGenerator:
+    def __init__(self):
+        self.tokenizer = None
+        self.model = None
+        self.loaded = False
+        self.load_error = None
+    def load_model(self):
+        """Load the AI model"""
+        if self.loaded:
+            return True
+        logger.info("Loading model...")
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
+            logger.info("✅ Tokenizer loaded")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                MODEL_ID,
+                torch_dtype=torch.float32,
+                token=HF_TOKEN,
+                device_map=None
+            )
+            self.model = self.model.to(DEVICE)
+            self.model.eval()
+            self.loaded = True
+            logger.info("✅ Model loaded successfully")
+            return True
+        except Exception as e:
+            self.load_error = str(e)
+            logger.error(f"❌ Model loading failed: {str(e)}")
+            return False
+# Global generator instance
+generator = AIGenerator()
+async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify API key"""
+    if credentials.credentials != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return True
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan manager for FastAPI"""
+    logger.info("🚀 API Server starting up...")
+    yield
+app = FastAPI(lifespan=lifespan)
+def generate_text(prompt: str, max_tokens: int = None) -> str:
+    """
+    General function to generate text based on prompt
+    Args:
+        prompt: The prompt/instructions to send to the AI model
+        max_tokens: Maximum tokens to generate (defaults to MAX_TOKENS env var)
+    Returns:
+        Generated text from the AI model
+    """
+    try:
+        if not generator.loaded:
+            if not generator.load_model():
+                raise Exception(f"Model failed to load: {generator.load_error}")
+        logger.info(f"📝 Generating text with prompt (first 200 chars): {prompt[:200]}...")
+        # Tokenize the prompt
+        inputs = generator.tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512
+        )
+        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
+        # Generate text
+        with torch.no_grad():
+            outputs = generator.model.generate(
+                **inputs,
+                max_new_tokens=max_tokens or MAX_TOKENS,
+                do_sample=True,
+                top_p=0.9,
+                temperature=0.8,
+                pad_token_id=generator.tokenizer.eos_token_id,
+                repetition_penalty=1.1
+            )
+        # Decode the generated text
+        full_output = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Remove the prompt from the output if it's included
+        generated_text = full_output
+        if prompt in generated_text:
+            generated_text = generated_text.replace(prompt, "").strip()
+        logger.info(f"✅ Generated {len(generated_text)} characters")
+        return generated_text
+    except Exception as e:
+        logger.error(f"❌ Text generation failed: {str(e)}")
+        raise
+async def process_job(job_id: str, prompt: str, callback_url: str = None):
+    """Background task to process job with custom prompt"""
     try:
+        logger.info(f"🎯 Processing job {job_id}")
+        # Generate text based on the provided prompt
+        generated_text = generate_text(prompt)
+        # Store job results
+        jobs[job_id] = {
+            "status": "complete",
+            "result": generated_text,
+            "prompt": prompt,
+            "text_length": len(generated_text),
+            "model": MODEL_ID
         }
+        logger.info(f"✅ Completed job {job_id}")
+        # Send webhook callback if URL provided
+        if callback_url:
+            try:
+                webhook_data = {
+                    "job_id": job_id,
+                    "status": "complete",
+                    "result": generated_text,
+                    "prompt": prompt,
+                    "text_length": len(generated_text),
+                    "model": MODEL_ID
+                }
+                logger.info(f"📨 Sending webhook to: {callback_url}")
+                async with httpx.AsyncClient(timeout=30.0) as client:
+                    response = await client.post(
+                        callback_url,
+                        json=webhook_data,
+                        headers={"Content-Type": "application/json"}
+                    )
+                    if response.status_code >= 200 and response.status_code < 300:
+                        logger.info(f"✅ Webhook delivered successfully: {response.status_code}")
+                    else:
+                        logger.warning(f"⚠️ Webhook returned non-2xx status: {response.status_code}")
+            except Exception as e:
+                logger.error(f"❌ Webhook failed: {str(e)}")
     except Exception as e:
+        error_msg = f"Job failed: {str(e)}"
+        logger.error(f"❌ Job {job_id} failed: {error_msg}")
+        # Store failure information
+        jobs[job_id] = {
+            "status": "failed",
+            "error": error_msg,
+            "prompt": prompt
         }
+        # Send failure webhook if callback URL exists
+        if callback_url:
+            try:
+                async with httpx.AsyncClient(timeout=10.0) as client:
+                    await client.post(
+                        callback_url,
+                        json={
+                            "job_id": job_id,
+                            "status": "failed",
+                            "error": error_msg,
+                            "prompt": prompt
+                        },
+                        headers={"Content-Type": "application/json"}
+                    )
+            except Exception as e:
+                logger.error(f"Failed to send error webhook: {e}")
+@app.post("/api/generate")
+async def generate(
+    request: Request,
+    background_tasks: BackgroundTasks,
+    auth: bool = Depends(verify_api_key)
+):
+    """
+    Endpoint to generate text with custom prompt instructions
+    Expected JSON payload:
+    {
+        "prompt": "Your instructions here",
+        "max_tokens": 450,  # optional
+        "callback_url": "https://your-webhook.url"  # optional
+    }
+    """
     try:
+        data = await request.json()
+        job_id = str(uuid.uuid4())
+        # Validate input
+        if not data.get("prompt"):
+            raise HTTPException(status_code=400, detail="Prompt is required")
+        prompt = data["prompt"]
+        max_tokens = data.get("max_tokens")
+        callback_url = data.get("callback_url")
+        logger.info(f"📥 Received job {job_id} with prompt length: {len(prompt)}")
+        # Store initial job data
+        jobs[job_id] = {
+            "status": "processing",
+            "callback_url": callback_url,
+            "prompt": prompt,
+            "max_tokens": max_tokens
         }
+        # Process job in background
+        background_tasks.add_task(
+            process_job,
+            job_id,
+            prompt,
+            callback_url
         )
+        return JSONResponse({
+            "job_id": job_id,
+            "status": "queued",
+            "message": "Text generation started",
+            "model": MODEL_ID,
+            "estimated_time": "30-60 seconds"
+        })
+    except Exception as e:
+        logger.error(f"❌ Generation request error: {str(e)}")
+        raise HTTPException(status_code=400, detail=str(e))
+@app.post("/api/generate-sync")
+async def generate_sync(
+    request: Request,
+    auth: bool = Depends(verify_api_key)
+):
+    """
+    Synchronous endpoint for immediate generation (for smaller requests)
+    Expected JSON payload:
+    {
+        "prompt": "Your instructions here",
+        "max_tokens": 450  # optional
+    }
+    """
     try:
+        data = await request.json()
+        if not data.get("prompt"):
+            raise HTTPException(status_code=400, detail="Prompt is required")
+        prompt = data["prompt"]
+        max_tokens = data.get("max_tokens")
+        logger.info(f"📝 Synchronous generation request with prompt length: {len(prompt)}")
+        # Generate text synchronously
+        generated_text = generate_text(prompt, max_tokens)
+        return JSONResponse({
+            "status": "success",
+            "result": generated_text,
+            "text_length": len(generated_text),
+            "model": MODEL_ID
+        })
     except Exception as e:
+        logger.error(f"❌ Synchronous generation failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/status/{job_id}")
+async def get_status(job_id: str, auth: bool = Depends(verify_api_key)):
+    """Check job status"""
+    if job_id not in jobs:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return JSONResponse(jobs[job_id])
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    completed_jobs = [job for job in jobs.values() if job.get("status") == "complete"]
+    avg_length = sum(job.get("text_length", 0) for job in completed_jobs) / max(1, len(completed_jobs))
+    return JSONResponse({
+        "status": "healthy",
+        "model_loaded": generator.loaded,
+        "model_id": MODEL_ID,
+        "total_jobs": len(jobs),
+        "completed_jobs": len(completed_jobs),
+        "failed_jobs": sum(1 for job in jobs.values() if job.get("status") == "failed"),
+        "average_text_length": round(avg_length, 2)
+    })
+@app.post("/api/test")
+async def test_generation(
+    request: Request,
+    auth: bool = Depends(verify_api_key)
+):
+    """Test endpoint with custom prompt"""
     try:
+        if not generator.loaded:
+            if not generator.load_model():
+                return JSONResponse({"status": "error", "error": "Model failed to load"})
+        data = await request.json()
+        test_prompt = data.get("prompt", "Write a short story about AI in 100 words.")
+        logger.info(f"🧪 Testing generation with prompt: {test_prompt[:100]}...")
+        generated_text = generate_text(test_prompt, max_tokens=200)
+        return JSONResponse({
+            "status": "success",
+            "prompt": test_prompt,
+            "result": generated_text,
+            "text_length": len(generated_text),
+            "model": MODEL_ID
+        })
     except Exception as e:
+        logger.error(f"❌ Test generation failed: {str(e)}")
+        return JSONResponse({"status": "error", "error": str(e)}, status_code=500)
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return JSONResponse({
+        "message": "AI Text Generation API",
+        "version": "3.0",
+        "model": MODEL_ID,
+        "features": "General purpose text generation with custom prompts",
+        "endpoints": {
+            "generate_async": "POST /api/generate (with 'prompt' field)",
+            "generate_sync": "POST /api/generate-sync (with 'prompt' field)",
+            "check_status": "GET /api/status/{job_id}",
+            "health": "GET /health",
+            "test": "POST /api/test (with optional 'prompt' field)"
+        },
+        "usage": "Send POST request with {'prompt': 'your instructions'}",
+        "status": "operational"
+    })
 if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=PORT,
+        log_level="info"
+    )