| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | import torch |
| |
|
| | |
| | app = FastAPI() |
| |
|
| | |
| | model_name = "Qwen/Qwen2.5-0.5B" |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto") |
| |
|
| | |
| | class TextInput(BaseModel): |
| | prompt: str |
| | max_length: int = 100 |
| |
|
| | |
| | @app.post("/generate") |
| | async def generate_text(input: TextInput): |
| | try: |
| | |
| | inputs = tokenizer(input.prompt, return_tensors="pt").to(model.device) |
| | |
| | |
| | outputs = model.generate( |
| | inputs["input_ids"], |
| | max_length=input.max_length, |
| | num_return_sequences=1, |
| | no_repeat_ngram_size=2, |
| | do_sample=True, |
| | top_k=50, |
| | top_p=0.95 |
| | ) |
| | |
| | |
| | generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | return {"generated_text": generated_text} |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=str(e)) |
| |
|
| | |
| | @app.get("/") |
| | async def root(): |
| | return {"message": "Qwen2.5-0.5B API is running!"} |