| import uvicorn |
| from fastapi import FastAPI, HTTPException, Request |
| from pydantic import BaseModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
|
|
| |
| app = FastAPI() |
|
|
| |
| model_name = "Qwen/Qwen2.5-0.5B" |
| try: |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype="auto", |
| device_map="auto", |
| attn_implementation="eager" |
| ) |
| print("Model and tokenizer loaded successfully!") |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| raise |
|
|
| |
| class TextInput(BaseModel): |
| prompt: str |
| max_length: int = 100 |
|
|
| |
| @app.post("/generate") |
| async def generate_text(input: TextInput): |
| try: |
| |
| inputs = tokenizer(input.prompt, return_tensors="pt").to(model.device) |
| |
| |
| outputs = model.generate( |
| inputs["input_ids"], |
| max_length=input.max_length, |
| num_return_sequences=1, |
| no_repeat_ngram_size=2, |
| do_sample=True, |
| top_k=50, |
| top_p=0.95 |
| ) |
| |
| |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return {"generated_text": generated_text} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| |
| @app.get("/") |
| async def root(): |
| return {"message": "Qwen2.5-0.5B API is running!"} |
|
|
| |
| @app.get("/api_link") |
| async def get_api_link(request: Request): |
| |
| host = request.client.host |
| |
| port = request.url.port if request.url.port else 7860 |
| |
| base_url = f"http://{host}:{port}" |
| return { |
| "api_url": base_url, |
| "endpoints": { |
| "health_check": f"{base_url}/", |
| "generate_text": f"{base_url}/generate", |
| "api_link": f"{base_url}/api_link" |
| } |
| } |
|
|
| |
| if __name__ == "__main__": |
| uvicorn.run(app, host="0.0.0.0", port=7860) |