| import uvicorn |
| from fastapi import FastAPI, HTTPException, Request |
| from pydantic import BaseModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
|
|
| |
| app = FastAPI() |
|
|
| |
| model_name = "Qwen/Qwen2.5-0.5B" |
| try: |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype="auto", |
| device_map="auto", |
| attn_implementation="eager" |
| ) |
| print("Model and tokenizer loaded successfully!") |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| raise |
|
|
| |
| class TextInput(BaseModel): |
| prompt: str |
| max_length: int = 100 |
|
|
| |
| @app.post("/generate") |
| async def generate_text(input: TextInput): |
| try: |
| |
| inputs = tokenizer(input.prompt, return_tensors="pt").to(model.device) |
| |
| |
| outputs = model.generate( |
| inputs["input_ids"], |
| max_length=input.max_length, |
| num_return_sequences=1, |
| no_repeat_ngram_size=2, |
| do_sample=True, |
| top_k=50, |
| top_p=0.95 |
| ) |
| |
| |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return {"generated_text": generated_text} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| |
| @app.get("/") |
| async def root(): |
| return {"message": "Qwen2.5-0.5B API is running!"} |
|
|
| |
| @app.get("/api_link") |
| async def get_api_link(request: Request): |
| host = request.client.host |
| return {"api_url": f"http://{host}:7860"} |
|
|
| |
| if __name__ == "__main__": |
| uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|