Spaces:

Robostar
/

french_ml

Paused

french_ml / app.py

Update app.py

fec2d9d verified 12 months ago

1.14 kB



	from fastapi import FastAPI
	from pydantic import BaseModel
	from huggingface_hub import InferenceClient

	app = FastAPI()

	# Use Hugging Face Inference API (Replace model name if needed)
	# Charger le modèle et le tokenizer
	#model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B
	#model_name = "HuggingFaceH4/zephyr-3b"
	#model_name = "serkanarslan/mistral-7b-mini-ft"
	# Choose a smaller model for free-tier
	#model_name = "microsoft/phi-2"
	#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc.


	# ✅ Use the full Hugging Face Inference API URL
	HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	client = InferenceClient(HF_API_URL)

	# Define request format
	class ChatRequest(BaseModel):
	message: str

	@app.post("/chat")
	async def chat(request: ChatRequest):
	# ✅ Corrected function call with `model` argument
	response = client.text_generation(
	request.message,
	model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	max_new_tokens=100
	)
	return {"response": response} # ✅ Removed extra quote