File size: 853 Bytes
6efef64
22af552
 
6efef64
 
 
21bfda5
6efef64
 
 
 
 
22af552
6efef64
 
21bfda5
6efef64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from typing import Any

import spaces
import gradio

from service import generate, list_models
from models import gemma4_e2b

app = gradio.Server()


@app.api(name="generate", description="Text generation using a chat template.")
@spaces.GPU(duration=10)
def generate_endpoint(
    messages: list[dict[str, str]],
    model: str = gemma4_e2b.MODEL_ID,
    max_tokens: int = 512,
    temperature: float = 0.7,
    top_p: float = 0.9,
    stop: list[str] | None = None,
) -> dict[str, Any]:
    return generate(
        model=model,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stop=stop,
    )


@app.api(name="models", description="List available models and their capabilities.")
def models_endpoint() -> dict[str, list[dict[str, Any]]]:
    return list_models()


app.launch()