| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
| |
|
| | |
| | class ModelInput(BaseModel): |
| | prompt: str |
| | max_new_tokens: int = 50 |
| |
|
| | |
| | app = FastAPI() |
| |
|
| | |
| | model_path = "HuggingFaceTB/SmolLM2-135M-Instruct" |
| | |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | model = AutoModelForCausalLM.from_pretrained(model_path) |
| |
|
| | |
| | generator = pipeline("text-generation", model=model, tokenizer=tokenizer) |
| |
|
| | |
| | def generate_response(model, tokenizer, instruction, max_new_tokens=128): |
| | """Generate a response from the model based on an instruction.""" |
| | try: |
| | |
| | messages = [{"role": "user", "content": instruction}] |
| | input_text = tokenizer.apply_chat_template( |
| | messages, tokenize=False, add_generation_prompt=True |
| | ) |
| | |
| | inputs = tokenizer.encode(input_text, return_tensors="pt") |
| | outputs = model.generate( |
| | inputs, |
| | max_new_tokens=max_new_tokens, |
| | temperature=0.2, |
| | top_p=0.9, |
| | do_sample=True, |
| | ) |
| | |
| | response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | return response |
| | except Exception as e: |
| | raise ValueError(f"Error generating response: {e}") |
| |
|
| | @app.post("/generate") |
| | def generate_text(input: ModelInput): |
| | """API endpoint to generate text.""" |
| | try: |
| | |
| | response = generate_response( |
| | model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens |
| | ) |
| | return {"generated_text": response} |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=str(e)) |
| |
|
| | @app.get("/") |
| | def root(): |
| | return {"message": "Welcome to the Hugging Face Model API!"} |
| |
|