Spaces:

joelg
/

discover_rag

Sleeping

App Files Files Community

joelg commited on Oct 8

Commit

d6afc31

1 Parent(s): 5ddef4d

FIX bilingual interface

Browse files

Files changed (1) hide show

app.py +43 -45

app.py CHANGED Viewed

@@ -78,33 +78,23 @@ def format_chunks(results):
 def create_interface():
     with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo:
-        # State for language
-        lang_state = gr.State("en")
-        # Header with language selector
-        with gr.Row():
-            gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG")
-            with gr.Column(scale=1):
-                lang_dropdown = gr.Dropdown(
-                    choices=[("English", "en"), ("Français", "fr")],
-                    value="en",
-                    label="Language / Langue",
-                    interactive=True
-                )
         with gr.Tabs() as tabs:
             # Tab 1: Corpus Management
             with gr.Tab(label="📚 Corpus"):
-                gr.Markdown("## Corpus Management")
                 gr.Markdown("""
-                **Default corpus:** Multiple PDF documents from the `documents/` folder.
-                **Or:** Upload your own PDF document to use instead.
-                1. Select your embedding model
-                2. Adjust chunking parameters if needed
-                3. Click "Process Corpus"
                 """)
                 # Embedding model selection FIRST
@@ -115,11 +105,11 @@ def create_interface():
                         "nomic-ai/nomic-embed-text-v2-moe",
                     ],
                     value="sentence-transformers/all-MiniLM-L6-v2",
-                    label="🔤 Embedding Model (select before processing)"
                 )
                 pdf_upload = gr.File(
-                    label="📄 Upload PDF (optional - leave empty to use default corpus from documents/ folder)",
                     file_types=[".pdf"]
                 )
@@ -129,25 +119,25 @@ def create_interface():
                         maximum=1000,
                         value=500,
                         step=50,
-                        label="Chunk Size (characters)"
                     )
                     chunk_overlap = gr.Slider(
                         minimum=0,
                         maximum=200,
                         value=50,
                         step=10,
-                        label="Chunk Overlap (characters)"
                     )
-                process_btn = gr.Button("🚀 Process Corpus", variant="primary", size="lg")
-                corpus_status = gr.Textbox(label="Status", interactive=False)
                 # Display default corpus info
-                with gr.Accordion("📖 Corpus Information", open=False):
                     default_corpus_display = gr.Markdown()
                 # Display processed chunks
-                with gr.Accordion("📑 Processed Chunks", open=False):
                     processed_chunks_display = gr.Markdown()
                 # State to hold example questions
@@ -160,11 +150,15 @@ def create_interface():
                 )
             # Tab 2: Retrieval Configuration
-            with gr.Tab(label="🔍 Retrieval"):
-                gr.Markdown("## Retrieval Configuration")
-                gr.Markdown("Configure how relevant chunks are retrieved from the corpus.")
-                gr.Markdown(f"**Current Embedding Model:** The model selected in the Corpus tab is used.")
                 with gr.Row():
                     top_k = gr.Slider(
@@ -172,20 +166,24 @@ def create_interface():
                         maximum=10,
                         value=3,
                         step=1,
-                        label="Top K (number of chunks to retrieve)"
                     )
                     similarity_threshold = gr.Slider(
                         minimum=0.0,
                         maximum=1.0,
                         value=0.5,
                         step=0.05,
-                        label="Similarity Threshold (minimum score - filters low-quality matches)"
                     )
             # Tab 3: Generation Configuration
-            with gr.Tab(label="🤖 Generation"):
-                gr.Markdown("## Generation Configuration")
-                gr.Markdown("Select the language model and configure generation parameters.")
                 llm_model = gr.Dropdown(
                     choices=[
@@ -194,7 +192,7 @@ def create_interface():
                         "google/gemma-2-2b-it",
                     ],
                     value="meta-llama/Llama-3.2-1B-Instruct",
-                    label="Language Model"
                 )
                 with gr.Row():
@@ -203,28 +201,28 @@ def create_interface():
                         maximum=2.0,
                         value=0.7,
                         step=0.1,
-                        label="Temperature (creativity)"
                     )
                     max_tokens = gr.Slider(
                         minimum=100,
                         maximum=2048,
                         value=800,
                         step=50,
-                        label="Max Tokens (response length - higher for reasoning models)"
                     )
             # Tab 4: Query & Results
-            with gr.Tab(label="💬 Query"):
-                gr.Markdown("## Ask a Question")
                 query_input = gr.Textbox(
-                    label="Your Question",
-                    placeholder="Enter your question here...",
                     lines=3
                 )
-                with gr.Accordion("💡 Example Questions (click to expand)", open=True):
-                    gr.Markdown("*Questions generated based on your corpus content*")
                     examples_markdown = gr.Markdown(visible=False)
                     # Connect processing to update examples

 def create_interface():
     with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo:
+        # Header - Bilingual
+        gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG")
+        gr.Markdown("*A pedagogical tool to understand Retrieval Augmented Generation / Un outil pédagogique pour comprendre la génération augmentée par récupération*")
         with gr.Tabs() as tabs:
             # Tab 1: Corpus Management
             with gr.Tab(label="📚 Corpus"):
+                gr.Markdown("## Corpus Management / Gestion du Corpus")
                 gr.Markdown("""
+                **EN - Default corpus:** Multiple PDF documents from the `documents/` folder. Or upload your own PDF.
+                **FR - Corpus par défaut :** Plusieurs documents PDF du dossier `documents/`. Ou téléchargez votre propre PDF.
+                1. Select your embedding model / Sélectionnez votre modèle d'embedding
+                2. Adjust chunking parameters if needed / Ajustez les paramètres de découpage si nécessaire
+                3. Click "Process Corpus" / Cliquez sur "Process Corpus"
                 """)
                 # Embedding model selection FIRST
                         "nomic-ai/nomic-embed-text-v2-moe",
                     ],
                     value="sentence-transformers/all-MiniLM-L6-v2",
+                    label="🔤 Embedding Model / Modèle d'Embedding (select before processing / sélectionnez avant traitement)"
                 )
                 pdf_upload = gr.File(
+                    label="📄 Upload PDF / Télécharger PDF (optional / optionnel)",
                     file_types=[".pdf"]
                 )
                         maximum=1000,
                         value=500,
                         step=50,
+                        label="Chunk Size / Taille des Chunks (characters / caractères)"
                     )
                     chunk_overlap = gr.Slider(
                         minimum=0,
                         maximum=200,
                         value=50,
                         step=10,
+                        label="Chunk Overlap / Chevauchement (characters / caractères)"
                     )
+                process_btn = gr.Button("🚀 Process Corpus / Traiter le Corpus", variant="primary", size="lg")
+                corpus_status = gr.Textbox(label="Status / Statut", interactive=False)
                 # Display default corpus info
+                with gr.Accordion("📖 Corpus Information / Informations sur le Corpus", open=False):
                     default_corpus_display = gr.Markdown()
                 # Display processed chunks
+                with gr.Accordion("📑 Processed Chunks / Chunks Traités", open=False):
                     processed_chunks_display = gr.Markdown()
                 # State to hold example questions
                 )
             # Tab 2: Retrieval Configuration
+            with gr.Tab(label="🔍 Retrieval / Récupération"):
+                gr.Markdown("## Retrieval Configuration / Configuration de la Récupération")
+                gr.Markdown("""
+                **EN:** Configure how relevant chunks are retrieved from the corpus.
+                **FR:** Configurez comment les chunks pertinents sont récupérés du corpus.
+                """)
+                gr.Markdown("**Current Embedding Model / Modèle d'Embedding Actuel:** The model selected in the Corpus tab / Le modèle sélectionné dans l'onglet Corpus")
                 with gr.Row():
                     top_k = gr.Slider(
                         maximum=10,
                         value=3,
                         step=1,
+                        label="Top K (number of chunks / nombre de chunks à récupérer)"
                     )
                     similarity_threshold = gr.Slider(
                         minimum=0.0,
                         maximum=1.0,
                         value=0.5,
                         step=0.05,
+                        label="Similarity Threshold / Seuil de Similarité (minimum score / score minimum)"
                     )
             # Tab 3: Generation Configuration
+            with gr.Tab(label="🤖 Generation / Génération"):
+                gr.Markdown("## Generation Configuration / Configuration de la Génération")
+                gr.Markdown("""
+                **EN:** Select the language model and configure generation parameters.
+                **FR:** Sélectionnez le modèle de langage et configurez les paramètres de génération.
+                """)
                 llm_model = gr.Dropdown(
                     choices=[
                         "google/gemma-2-2b-it",
                     ],
                     value="meta-llama/Llama-3.2-1B-Instruct",
+                    label="Language Model / Modèle de Langage"
                 )
                 with gr.Row():
                         maximum=2.0,
                         value=0.7,
                         step=0.1,
+                        label="Temperature / Température (creativity / créativité)"
                     )
                     max_tokens = gr.Slider(
                         minimum=100,
                         maximum=2048,
                         value=800,
                         step=50,
+                        label="Max Tokens (response length / longueur réponse - higher for reasoning / plus pour raisonnement)"
                     )
             # Tab 4: Query & Results
+            with gr.Tab(label="💬 Query / Requête"):
+                gr.Markdown("## Ask a Question / Posez une Question")
                 query_input = gr.Textbox(
+                    label="Your Question / Votre Question",
+                    placeholder="Enter your question here / Entrez votre question ici...",
                     lines=3
                 )
+                with gr.Accordion("💡 Example Questions / Questions d'Exemple (click to expand / cliquez pour développer)", open=True):
+                    gr.Markdown("*Questions generated based on your corpus content / Questions générées à partir de votre corpus*")
                     examples_markdown = gr.Markdown(visible=False)
                     # Connect processing to update examples