| from fastapi import FastAPI |
| from pydantic import BaseModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
| from fastapi.middleware.cors import CORSMiddleware |
|
|
| |
| |
| |
| model_name = "thedeba/deb-8B" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained(model_name) |
| device = "cpu" |
| model.to(device) |
|
|
| |
| |
| |
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
| class Query(BaseModel): |
| text: str |
|
|
| |
| |
| |
| @app.post("/generate") |
| def generate(query: Query): |
| |
| inputs = tokenizer(query.text, return_tensors="pt").to(device) |
|
|
| |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=200, |
| temperature=0.8, |
| ) |
|
|
| |
| response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return {"response": response_text} |
|
|
| |
| |
| |
| @app.get("/") |
| def root(): |
| return {"status": "API is running!"} |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|
|
|
|
| """from fastapi import FastAPI |
| from pydantic import BaseModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
| from fastapi.middleware.cors import CORSMiddleware |
| |
| |
| # ------------------------------- |
| # Load model & tokenizer from HF Hub |
| # ------------------------------- |
| model_name = "thedeba/deb-8B" # HF Hub model path |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained(model_name) |
| device = "auto" # Spaces free tier uses CPU; you can switch to "cuda" if GPU granted |
| #model.to(device) |
| |
| # ------------------------------- |
| # FastAPI setup |
| # ------------------------------- |
| app = FastAPI() |
| |
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], # or ["https://<username>.github.io"] |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
| |
| |
| |
| class Query(BaseModel): |
| text: str |
| |
| @app.post("/generate") |
| def generate(query: Query): |
| messages = [{"role": "user", "content": query.text}] |
| |
| # Convert to model input using chat template |
| inputs = tokenizer.apply_chat_template( |
| messages, |
| tokenize=True, |
| add_generation_prompt=True, |
| return_tensors="pt", |
| ).to(device) |
| |
| # Generate |
| outputs = model.generate( |
| input_ids=inputs, |
| max_new_tokens=200, |
| use_cache=True, |
| temperature=0.8, |
| min_p=0.1, |
| ) |
| |
| # Decode & extract assistant response |
| output_string = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
| response = output_string.split("assistant")[-1].strip() |
| return {"response": response} |
| |
| @app.get("/") |
| def root(): |
| return {"deb": "API is running!"} |
| |
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860)""" |
|
|