| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from huggingface_hub import InferenceClient | |
| app = FastAPI() | |
| # Use Hugging Face Inference API (Replace model name if needed) | |
| # Charger le modèle et le tokenizer | |
| #model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B | |
| #model_name = "HuggingFaceH4/zephyr-3b" | |
| #model_name = "serkanarslan/mistral-7b-mini-ft" | |
| # Choose a smaller model for free-tier | |
| #model_name = "microsoft/phi-2" | |
| #model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc. | |
| # ✅ Use the full Hugging Face Inference API URL | |
| HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| client = InferenceClient(HF_API_URL) | |
| # Define request format | |
| class ChatRequest(BaseModel): | |
| message: str | |
| async def chat(request: ChatRequest): | |
| # ✅ Corrected function call with `model` argument | |
| response = client.text_generation( | |
| request.message, | |
| model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| max_new_tokens=100 | |
| ) | |
| return {"response": response} # ✅ Removed extra quote | |