File size: 621 Bytes
516c24b
b4f7b60
516c24b
 
b4f7b60
 
516c24b
 
 
 
56e9328
b4f7b60
 
516c24b
 
 
 
 
 
 
676d2b9
516c24b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient

app = FastAPI()
client = InferenceClient(api_key=os.environ.get("HF_TOKEN"))

class Request(BaseModel):
    prompt: str

@app.get("/")
def home():
    return {"status": "Running"}

@app.post("/generate")
def generate_text(request: Request):
    messages = [{"role": "user", "content": request.prompt}]
    response = client.chat_completion(
        messages=messages,
        model="meta-llama/Llama-3.1-8B-Instruct",
        max_tokens=500
    )
    return {"response": response.choices[0].message.content}