view / app.py
GueuleDange's picture
Update app.py
8b946f6 verified
import os
import gradio as gr
from huggingface_hub import InferenceClient
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
import json
from typing import Generator
import asyncio
# Récupérer le token depuis les variables d'environnement (Secrets)
HF_TOKEN = os.environ.get("HF_TOKEN")
# Définir un message système
SYSTEM_MESSAGE = """
Tu es TeachEase, un enseignant virtuel conçu pour aider les élèves à comprendre leurs cours, faire leurs exercices et devoirs.
Ton rôle est d'expliquer les concepts de manière claire et pédagogique, de fournir des exemples concrets et de poser des questions pour vérifier la compréhension.
**Instructions spéciales :**
1. **Salutations :** Réponds de manière courte et amicale.
2. **Expressions mathématiques et scientifiques :** Utilise le format LaTeX.
3. **Traduction et réponses multilingues :** Adapte ta réponse en fonction de la langue demandée.
"""
# Créer l'application FastAPI
app = FastAPI(title="TeachEase API")
def detect_language_request(message: str) -> str:
"""Détecte la langue demandée"""
language_requests = {
"anglais": "en", "english": "en", "espagnol": "es", "spanish": "es",
"allemand": "de", "german": "de", "portugais": "pt", "portuguese": "pt",
"français": "fr", "french": "fr", "italien": "it", "italian": "it",
"chinois": "zh", "chinese": "zh", "japonais": "ja", "japanese": "ja",
"russe": "ru",
}
for keyword, lang_code in language_requests.items():
if keyword in message.lower():
return lang_code
return None
async def generate_stream_response(message: str, history: list = None, max_tokens: int = 256, temperature: float = 0.7, top_p: float = 0.95) -> Generator[str, None, None]:
"""Génère une réponse en streaming"""
if not HF_TOKEN:
yield json.dumps({"error": "HF_TOKEN non configuré"})
return
try:
client = InferenceClient(token=HF_TOKEN, model="openai/gpt-oss-20b")
messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
# Ajouter l'historique si disponible
if history:
for exchange in history[-3:]: # Limiter aux 3 derniers échanges
if exchange.get("user"):
messages.append({"role": "user", "content": exchange["user"]})
if exchange.get("assistant"):
messages.append({"role": "assistant", "content": exchange["assistant"]})
messages.append({"role": "user", "content": message})
full_response = ""
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices and chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
full_response += token
# Envoyer chaque token au fur et à mesure
yield f"data: {json.dumps({'token': token, 'full_response': full_response})}\n\n"
await asyncio.sleep(0.01) # Petit délai pour le streaming
# Signal de fin
yield f"data: {json.dumps({'done': True, 'full_response': full_response})}\n\n"
except Exception as e:
yield f"data: {json.dumps({'error': str(e)})}\n\n"
@app.post("/api/chat/stream")
async def chat_stream(request: Request):
"""Endpoint API pour le streaming"""
try:
data = await request.json()
message = data.get("message", "")
history = data.get("history", [])
max_tokens = data.get("max_tokens", 256)
temperature = data.get("temperature", 0.7)
top_p = data.get("top_p", 0.95)
return StreamingResponse(
generate_stream_response(message, history, max_tokens, temperature, top_p),
media_type="text/event-stream",
headers={
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type",
}
)
except Exception as e:
return {"error": str(e)}
@app.options("/api/chat/stream")
async def options_chat_stream():
"""Gérer les requêtes OPTIONS pour CORS"""
return {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type",
}
# Interface Gradio (optionnelle)
def respond(message, history, system_message=SYSTEM_MESSAGE, max_tokens=256, temperature=0.7, top_p=0.95):
"""Fonction pour l'interface Gradio"""
if not HF_TOKEN:
yield "❌ Erreur : HF_TOKEN non configuré"
return
try:
client = InferenceClient(token=HF_TOKEN, model="openai/gpt-oss-20b")
messages = [{"role": "system", "content": system_message}]
for val in history[-3:]:
if val[0]: messages.append({"role": "user", "content": val[0]})
if val[1]: messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
):
if message.choices and message.choices[0].delta.content:
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield f"❌ Erreur : {str(e)}"
# Configuration de l'interface Gradio
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value=SYSTEM_MESSAGE, label="System message"),
gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
title="TeachEase - Enseignant Virtuel",
description="Bienvenue sur TeachEase ! API streaming disponible sur /api/chat/stream"
)
# Lancer avec Gradio App
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)