import os import gradio as gr from huggingface_hub import InferenceClient from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse import json from typing import Generator import asyncio # Récupérer le token depuis les variables d'environnement (Secrets) HF_TOKEN = os.environ.get("HF_TOKEN") # Définir un message système SYSTEM_MESSAGE = """ Tu es TeachEase, un enseignant virtuel conçu pour aider les élèves à comprendre leurs cours, faire leurs exercices et devoirs. Ton rôle est d'expliquer les concepts de manière claire et pédagogique, de fournir des exemples concrets et de poser des questions pour vérifier la compréhension. **Instructions spéciales :** 1. **Salutations :** Réponds de manière courte et amicale. 2. **Expressions mathématiques et scientifiques :** Utilise le format LaTeX. 3. **Traduction et réponses multilingues :** Adapte ta réponse en fonction de la langue demandée. """ # Créer l'application FastAPI app = FastAPI(title="TeachEase API") def detect_language_request(message: str) -> str: """Détecte la langue demandée""" language_requests = { "anglais": "en", "english": "en", "espagnol": "es", "spanish": "es", "allemand": "de", "german": "de", "portugais": "pt", "portuguese": "pt", "français": "fr", "french": "fr", "italien": "it", "italian": "it", "chinois": "zh", "chinese": "zh", "japonais": "ja", "japanese": "ja", "russe": "ru", } for keyword, lang_code in language_requests.items(): if keyword in message.lower(): return lang_code return None async def generate_stream_response(message: str, history: list = None, max_tokens: int = 256, temperature: float = 0.7, top_p: float = 0.95) -> Generator[str, None, None]: """Génère une réponse en streaming""" if not HF_TOKEN: yield json.dumps({"error": "HF_TOKEN non configuré"}) return try: client = InferenceClient(token=HF_TOKEN, model="openai/gpt-oss-20b") messages = [{"role": "system", "content": SYSTEM_MESSAGE}] # Ajouter l'historique si disponible if history: for exchange in history[-3:]: # Limiter aux 3 derniers échanges if exchange.get("user"): messages.append({"role": "user", "content": exchange["user"]}) if exchange.get("assistant"): messages.append({"role": "assistant", "content": exchange["assistant"]}) messages.append({"role": "user", "content": message}) full_response = "" for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices and chunk.choices[0].delta.content: token = chunk.choices[0].delta.content full_response += token # Envoyer chaque token au fur et à mesure yield f"data: {json.dumps({'token': token, 'full_response': full_response})}\n\n" await asyncio.sleep(0.01) # Petit délai pour le streaming # Signal de fin yield f"data: {json.dumps({'done': True, 'full_response': full_response})}\n\n" except Exception as e: yield f"data: {json.dumps({'error': str(e)})}\n\n" @app.post("/api/chat/stream") async def chat_stream(request: Request): """Endpoint API pour le streaming""" try: data = await request.json() message = data.get("message", "") history = data.get("history", []) max_tokens = data.get("max_tokens", 256) temperature = data.get("temperature", 0.7) top_p = data.get("top_p", 0.95) return StreamingResponse( generate_stream_response(message, history, max_tokens, temperature, top_p), media_type="text/event-stream", headers={ "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type", } ) except Exception as e: return {"error": str(e)} @app.options("/api/chat/stream") async def options_chat_stream(): """Gérer les requêtes OPTIONS pour CORS""" return { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type", } # Interface Gradio (optionnelle) def respond(message, history, system_message=SYSTEM_MESSAGE, max_tokens=256, temperature=0.7, top_p=0.95): """Fonction pour l'interface Gradio""" if not HF_TOKEN: yield "❌ Erreur : HF_TOKEN non configuré" return try: client = InferenceClient(token=HF_TOKEN, model="openai/gpt-oss-20b") messages = [{"role": "system", "content": system_message}] for val in history[-3:]: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p ): if message.choices and message.choices[0].delta.content: token = message.choices[0].delta.content response += token yield response except Exception as e: yield f"❌ Erreur : {str(e)}" # Configuration de l'interface Gradio demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value=SYSTEM_MESSAGE, label="System message"), gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], title="TeachEase - Enseignant Virtuel", description="Bienvenue sur TeachEase ! API streaming disponible sur /api/chat/stream" ) # Lancer avec Gradio App app = gr.mount_gradio_app(app, demo, path="/") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)