Project_Gen_AI / app.py
Darryl237's picture
Initial commit - Mini chatbot RAG
eff6ee6
Raw
History Blame Contribute Delete
8.51 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from rag import RAGEngine
# ───────── INITIALISATION RAG ─────────
rag = RAGEngine("corpus/")
# ───────── MODÈLE FLAN-T5 ─────────
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def generate_answer(prompt, temperature=0.0, max_new_tokens=160):
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=768
)
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False,
repetition_penalty=1.5,
no_repeat_ngram_size=3,
early_stopping=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# ───────── RÉPONSE CONTRÔLÉE POUR LE CHATBOT RAG ─────────
def controlled_answer(question, sources_found):
best = sources_found[0]
text = rag._clean_for_prompt(best.text)
sentences = [
s.strip()
for s in text.split(".")
if len(s.strip()) > 40
]
question_lower = question.lower()
if "risque" in question_lower:
keywords = [
"risque", "biais", "erreur",
"confidentialité", "sécurité",
"RGPD", "discrimination"
]
elif "application" in question_lower or "utilisation" in question_lower:
keywords = [
"utilisé", "applications",
"diagnostic", "analyse",
"détection"
]
elif "avantage" in question_lower or "importance" in question_lower:
keywords = [
"améliorer", "optimiser",
"aider", "utile"
]
else:
keywords = []
selected = [
s for s in sentences
if any(k.lower() in s.lower() for k in keywords)
]
if not selected:
selected = sentences[:4]
final_answer = ". ".join(selected[:4]).strip()
if not final_answer.endswith("."):
final_answer += "."
return (
final_answer
+ f"\n\nSource utilisée : {best.filename}"
)
# ───────── SENTIMENT ─────────
sentiment = pipeline(
"sentiment-analysis",
model="nlptown/bert-base-multilingual-uncased-sentiment"
)
# ───────── CHATBOT RAG ─────────
def chatbot_response(message, history, temperature, top_k):
if history is None:
history = []
if not message or not message.strip():
return history, "", "Veuillez poser une question."
# Pour stabiliser la démo, on force le meilleur passage uniquement
sources_found = rag.search(message, top_k=1)
if not sources_found:
answer = (
"Je ne dispose pas d’assez d’informations dans le corpus "
"pour répondre correctement à cette question."
)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": answer})
return history, "", "Aucune source suffisamment pertinente trouvée."
answer = controlled_answer(message, sources_found)
source_text = ""
for src in sources_found:
source_text += f"### {src.filename} — score : {src.score:.3f}\n"
source_text += f"{src.text[:400]}...\n\n"
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": answer})
return history, "", source_text
# ───────── EXPLORATION CORPUS ─────────
def search_corpus(query, top_k):
if not query or not query.strip():
return "Entrez une requête de recherche."
results = rag.search(query, top_k=top_k)
if not results:
return "Aucun passage pertinent trouvé dans le corpus."
output = ""
for result in results:
output += f"## {result.filename} — score : {result.score:.3f}\n"
output += f"{result.text[:900]}...\n\n"
return output
# ───────── ANALYSE DE SENTIMENT ─────────
def analyse_sentiment(text):
if not text or not text.strip():
return "Entrez un texte à analyser."
result = sentiment(text)[0]
return f"""
Résultat : **{result['label']}**
Score de confiance : **{result['score']:.2f}**
"""
# ───────── GÉNÉRATION SANS RAG ─────────
def free_generation(prompt, temperature):
if not prompt or not prompt.strip():
return "Entrez un prompt."
return generate_answer(
prompt,
temperature=temperature,
max_new_tokens=220
)
# ───────── INTERFACE GRADIO ─────────
with gr.Blocks(title="Assistant RAG IA Générative") as app:
gr.Markdown("# Assistant RAG — IA Générative")
gr.Markdown(
"""
Cette application répond aux questions sur l’intelligence artificielle générative,
ses modèles, ses usages métier et ses enjeux éthiques à partir d’un corpus documentaire.
"""
)
with gr.Tab("Chatbot RAG"):
chatbot = gr.Chatbot(label="Conversation")
question = gr.Textbox(
label="Votre question",
placeholder="Exemple : Quels sont les risques de l'IA dans la médecine ?",
lines=2
)
with gr.Row():
temperature = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.0,
step=0.1,
label="Température"
)
top_k = gr.Slider(
minimum=1,
maximum=5,
value=1,
step=1,
label="Nombre de passages utilisés"
)
send_btn = gr.Button("Envoyer", variant="primary")
reset_btn = gr.Button("Réinitialiser la conversation")
sources = gr.Markdown(label="Sources utilisées")
send_btn.click(
chatbot_response,
inputs=[question, chatbot, temperature, top_k],
outputs=[chatbot, question, sources]
)
reset_btn.click(
lambda: ([], "", ""),
outputs=[chatbot, question, sources]
)
with gr.Tab("Explorer le corpus"):
gr.Markdown(
f"""
**Nombre de fichiers chargés :** {len(set(rag.chunk_sources))}
**Nombre de passages indexés :** {len(rag.chunks)}
"""
)
search_query = gr.Textbox(
label="Recherche dans la base de connaissances",
placeholder="Exemple : IA et finance"
)
search_top_k = gr.Slider(
minimum=1,
maximum=5,
value=3,
step=1,
label="Nombre de résultats"
)
search_btn = gr.Button("Rechercher")
search_output = gr.Markdown()
search_btn.click(
search_corpus,
inputs=[search_query, search_top_k],
outputs=search_output
)
with gr.Tab("Analyse de sentiment"):
sent_text = gr.Textbox(
label="Texte à analyser",
placeholder="Exemple : Ce projet est très intéressant et utile.",
lines=4
)
sent_btn = gr.Button("Analyser")
sent_output = gr.Markdown()
sent_btn.click(
analyse_sentiment,
inputs=sent_text,
outputs=sent_output
)
with gr.Tab("Génération sans RAG"):
gr.Markdown(
"""
Cet onglet permet de comparer une réponse générée sans contexte documentaire
avec une réponse enrichie par le RAG.
"""
)
free_prompt = gr.Textbox(
label="Prompt libre",
placeholder="Exemple : Explique les modèles de diffusion.",
lines=4
)
free_temp = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.0,
step=0.1,
label="Température"
)
free_btn = gr.Button("Générer")
free_output = gr.Textbox(label="Réponse sans RAG", lines=8)
free_btn.click(
free_generation,
inputs=[free_prompt, free_temp],
outputs=free_output
)
app.launch()