Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| from rag import RAGEngine | |
| # ───────── INITIALISATION RAG ───────── | |
| rag = RAGEngine("corpus/") | |
| # ───────── MODÈLE FLAN-T5 ───────── | |
| model_name = "google/flan-t5-base" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| def generate_answer(prompt, temperature=0.0, max_new_tokens=160): | |
| inputs = tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=768 | |
| ) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=False, | |
| repetition_penalty=1.5, | |
| no_repeat_ngram_size=3, | |
| early_stopping=True | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # ───────── RÉPONSE CONTRÔLÉE POUR LE CHATBOT RAG ───────── | |
| def controlled_answer(question, sources_found): | |
| best = sources_found[0] | |
| text = rag._clean_for_prompt(best.text) | |
| sentences = [ | |
| s.strip() | |
| for s in text.split(".") | |
| if len(s.strip()) > 40 | |
| ] | |
| question_lower = question.lower() | |
| if "risque" in question_lower: | |
| keywords = [ | |
| "risque", "biais", "erreur", | |
| "confidentialité", "sécurité", | |
| "RGPD", "discrimination" | |
| ] | |
| elif "application" in question_lower or "utilisation" in question_lower: | |
| keywords = [ | |
| "utilisé", "applications", | |
| "diagnostic", "analyse", | |
| "détection" | |
| ] | |
| elif "avantage" in question_lower or "importance" in question_lower: | |
| keywords = [ | |
| "améliorer", "optimiser", | |
| "aider", "utile" | |
| ] | |
| else: | |
| keywords = [] | |
| selected = [ | |
| s for s in sentences | |
| if any(k.lower() in s.lower() for k in keywords) | |
| ] | |
| if not selected: | |
| selected = sentences[:4] | |
| final_answer = ". ".join(selected[:4]).strip() | |
| if not final_answer.endswith("."): | |
| final_answer += "." | |
| return ( | |
| final_answer | |
| + f"\n\nSource utilisée : {best.filename}" | |
| ) | |
| # ───────── SENTIMENT ───────── | |
| sentiment = pipeline( | |
| "sentiment-analysis", | |
| model="nlptown/bert-base-multilingual-uncased-sentiment" | |
| ) | |
| # ───────── CHATBOT RAG ───────── | |
| def chatbot_response(message, history, temperature, top_k): | |
| if history is None: | |
| history = [] | |
| if not message or not message.strip(): | |
| return history, "", "Veuillez poser une question." | |
| # Pour stabiliser la démo, on force le meilleur passage uniquement | |
| sources_found = rag.search(message, top_k=1) | |
| if not sources_found: | |
| answer = ( | |
| "Je ne dispose pas d’assez d’informations dans le corpus " | |
| "pour répondre correctement à cette question." | |
| ) | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return history, "", "Aucune source suffisamment pertinente trouvée." | |
| answer = controlled_answer(message, sources_found) | |
| source_text = "" | |
| for src in sources_found: | |
| source_text += f"### {src.filename} — score : {src.score:.3f}\n" | |
| source_text += f"{src.text[:400]}...\n\n" | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return history, "", source_text | |
| # ───────── EXPLORATION CORPUS ───────── | |
| def search_corpus(query, top_k): | |
| if not query or not query.strip(): | |
| return "Entrez une requête de recherche." | |
| results = rag.search(query, top_k=top_k) | |
| if not results: | |
| return "Aucun passage pertinent trouvé dans le corpus." | |
| output = "" | |
| for result in results: | |
| output += f"## {result.filename} — score : {result.score:.3f}\n" | |
| output += f"{result.text[:900]}...\n\n" | |
| return output | |
| # ───────── ANALYSE DE SENTIMENT ───────── | |
| def analyse_sentiment(text): | |
| if not text or not text.strip(): | |
| return "Entrez un texte à analyser." | |
| result = sentiment(text)[0] | |
| return f""" | |
| Résultat : **{result['label']}** | |
| Score de confiance : **{result['score']:.2f}** | |
| """ | |
| # ───────── GÉNÉRATION SANS RAG ───────── | |
| def free_generation(prompt, temperature): | |
| if not prompt or not prompt.strip(): | |
| return "Entrez un prompt." | |
| return generate_answer( | |
| prompt, | |
| temperature=temperature, | |
| max_new_tokens=220 | |
| ) | |
| # ───────── INTERFACE GRADIO ───────── | |
| with gr.Blocks(title="Assistant RAG IA Générative") as app: | |
| gr.Markdown("# Assistant RAG — IA Générative") | |
| gr.Markdown( | |
| """ | |
| Cette application répond aux questions sur l’intelligence artificielle générative, | |
| ses modèles, ses usages métier et ses enjeux éthiques à partir d’un corpus documentaire. | |
| """ | |
| ) | |
| with gr.Tab("Chatbot RAG"): | |
| chatbot = gr.Chatbot(label="Conversation") | |
| question = gr.Textbox( | |
| label="Votre question", | |
| placeholder="Exemple : Quels sont les risques de l'IA dans la médecine ?", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.0, | |
| step=0.1, | |
| label="Température" | |
| ) | |
| top_k = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=1, | |
| step=1, | |
| label="Nombre de passages utilisés" | |
| ) | |
| send_btn = gr.Button("Envoyer", variant="primary") | |
| reset_btn = gr.Button("Réinitialiser la conversation") | |
| sources = gr.Markdown(label="Sources utilisées") | |
| send_btn.click( | |
| chatbot_response, | |
| inputs=[question, chatbot, temperature, top_k], | |
| outputs=[chatbot, question, sources] | |
| ) | |
| reset_btn.click( | |
| lambda: ([], "", ""), | |
| outputs=[chatbot, question, sources] | |
| ) | |
| with gr.Tab("Explorer le corpus"): | |
| gr.Markdown( | |
| f""" | |
| **Nombre de fichiers chargés :** {len(set(rag.chunk_sources))} | |
| **Nombre de passages indexés :** {len(rag.chunks)} | |
| """ | |
| ) | |
| search_query = gr.Textbox( | |
| label="Recherche dans la base de connaissances", | |
| placeholder="Exemple : IA et finance" | |
| ) | |
| search_top_k = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=3, | |
| step=1, | |
| label="Nombre de résultats" | |
| ) | |
| search_btn = gr.Button("Rechercher") | |
| search_output = gr.Markdown() | |
| search_btn.click( | |
| search_corpus, | |
| inputs=[search_query, search_top_k], | |
| outputs=search_output | |
| ) | |
| with gr.Tab("Analyse de sentiment"): | |
| sent_text = gr.Textbox( | |
| label="Texte à analyser", | |
| placeholder="Exemple : Ce projet est très intéressant et utile.", | |
| lines=4 | |
| ) | |
| sent_btn = gr.Button("Analyser") | |
| sent_output = gr.Markdown() | |
| sent_btn.click( | |
| analyse_sentiment, | |
| inputs=sent_text, | |
| outputs=sent_output | |
| ) | |
| with gr.Tab("Génération sans RAG"): | |
| gr.Markdown( | |
| """ | |
| Cet onglet permet de comparer une réponse générée sans contexte documentaire | |
| avec une réponse enrichie par le RAG. | |
| """ | |
| ) | |
| free_prompt = gr.Textbox( | |
| label="Prompt libre", | |
| placeholder="Exemple : Explique les modèles de diffusion.", | |
| lines=4 | |
| ) | |
| free_temp = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.0, | |
| step=0.1, | |
| label="Température" | |
| ) | |
| free_btn = gr.Button("Générer") | |
| free_output = gr.Textbox(label="Réponse sans RAG", lines=8) | |
| free_btn.click( | |
| free_generation, | |
| inputs=[free_prompt, free_temp], | |
| outputs=free_output | |
| ) | |
| app.launch() |