joelg commited on
Commit
d6afc31
·
1 Parent(s): 5ddef4d

FIX bilingual interface

Browse files
Files changed (1) hide show
  1. app.py +43 -45
app.py CHANGED
@@ -78,33 +78,23 @@ def format_chunks(results):
78
  def create_interface():
79
  with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo:
80
 
81
- # State for language
82
- lang_state = gr.State("en")
83
-
84
- # Header with language selector
85
- with gr.Row():
86
- gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG")
87
- with gr.Column(scale=1):
88
- lang_dropdown = gr.Dropdown(
89
- choices=[("English", "en"), ("Français", "fr")],
90
- value="en",
91
- label="Language / Langue",
92
- interactive=True
93
- )
94
 
95
  with gr.Tabs() as tabs:
96
 
97
  # Tab 1: Corpus Management
98
  with gr.Tab(label="📚 Corpus"):
99
- gr.Markdown("## Corpus Management")
100
  gr.Markdown("""
101
- **Default corpus:** Multiple PDF documents from the `documents/` folder.
102
 
103
- **Or:** Upload your own PDF document to use instead.
104
 
105
- 1. Select your embedding model
106
- 2. Adjust chunking parameters if needed
107
- 3. Click "Process Corpus"
108
  """)
109
 
110
  # Embedding model selection FIRST
@@ -115,11 +105,11 @@ def create_interface():
115
  "nomic-ai/nomic-embed-text-v2-moe",
116
  ],
117
  value="sentence-transformers/all-MiniLM-L6-v2",
118
- label="🔤 Embedding Model (select before processing)"
119
  )
120
 
121
  pdf_upload = gr.File(
122
- label="📄 Upload PDF (optional - leave empty to use default corpus from documents/ folder)",
123
  file_types=[".pdf"]
124
  )
125
 
@@ -129,25 +119,25 @@ def create_interface():
129
  maximum=1000,
130
  value=500,
131
  step=50,
132
- label="Chunk Size (characters)"
133
  )
134
  chunk_overlap = gr.Slider(
135
  minimum=0,
136
  maximum=200,
137
  value=50,
138
  step=10,
139
- label="Chunk Overlap (characters)"
140
  )
141
 
142
- process_btn = gr.Button("🚀 Process Corpus", variant="primary", size="lg")
143
- corpus_status = gr.Textbox(label="Status", interactive=False)
144
 
145
  # Display default corpus info
146
- with gr.Accordion("📖 Corpus Information", open=False):
147
  default_corpus_display = gr.Markdown()
148
 
149
  # Display processed chunks
150
- with gr.Accordion("📑 Processed Chunks", open=False):
151
  processed_chunks_display = gr.Markdown()
152
 
153
  # State to hold example questions
@@ -160,11 +150,15 @@ def create_interface():
160
  )
161
 
162
  # Tab 2: Retrieval Configuration
163
- with gr.Tab(label="🔍 Retrieval"):
164
- gr.Markdown("## Retrieval Configuration")
165
- gr.Markdown("Configure how relevant chunks are retrieved from the corpus.")
 
 
 
 
166
 
167
- gr.Markdown(f"**Current Embedding Model:** The model selected in the Corpus tab is used.")
168
 
169
  with gr.Row():
170
  top_k = gr.Slider(
@@ -172,20 +166,24 @@ def create_interface():
172
  maximum=10,
173
  value=3,
174
  step=1,
175
- label="Top K (number of chunks to retrieve)"
176
  )
177
  similarity_threshold = gr.Slider(
178
  minimum=0.0,
179
  maximum=1.0,
180
  value=0.5,
181
  step=0.05,
182
- label="Similarity Threshold (minimum score - filters low-quality matches)"
183
  )
184
 
185
  # Tab 3: Generation Configuration
186
- with gr.Tab(label="🤖 Generation"):
187
- gr.Markdown("## Generation Configuration")
188
- gr.Markdown("Select the language model and configure generation parameters.")
 
 
 
 
189
 
190
  llm_model = gr.Dropdown(
191
  choices=[
@@ -194,7 +192,7 @@ def create_interface():
194
  "google/gemma-2-2b-it",
195
  ],
196
  value="meta-llama/Llama-3.2-1B-Instruct",
197
- label="Language Model"
198
  )
199
 
200
  with gr.Row():
@@ -203,28 +201,28 @@ def create_interface():
203
  maximum=2.0,
204
  value=0.7,
205
  step=0.1,
206
- label="Temperature (creativity)"
207
  )
208
  max_tokens = gr.Slider(
209
  minimum=100,
210
  maximum=2048,
211
  value=800,
212
  step=50,
213
- label="Max Tokens (response length - higher for reasoning models)"
214
  )
215
 
216
  # Tab 4: Query & Results
217
- with gr.Tab(label="💬 Query"):
218
- gr.Markdown("## Ask a Question")
219
 
220
  query_input = gr.Textbox(
221
- label="Your Question",
222
- placeholder="Enter your question here...",
223
  lines=3
224
  )
225
 
226
- with gr.Accordion("💡 Example Questions (click to expand)", open=True):
227
- gr.Markdown("*Questions generated based on your corpus content*")
228
  examples_markdown = gr.Markdown(visible=False)
229
 
230
  # Connect processing to update examples
 
78
  def create_interface():
79
  with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo:
80
 
81
+ # Header - Bilingual
82
+ gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG")
83
+ gr.Markdown("*A pedagogical tool to understand Retrieval Augmented Generation / Un outil pédagogique pour comprendre la génération augmentée par récupération*")
 
 
 
 
 
 
 
 
 
 
84
 
85
  with gr.Tabs() as tabs:
86
 
87
  # Tab 1: Corpus Management
88
  with gr.Tab(label="📚 Corpus"):
89
+ gr.Markdown("## Corpus Management / Gestion du Corpus")
90
  gr.Markdown("""
91
+ **EN - Default corpus:** Multiple PDF documents from the `documents/` folder. Or upload your own PDF.
92
 
93
+ **FR - Corpus par défaut :** Plusieurs documents PDF du dossier `documents/`. Ou téléchargez votre propre PDF.
94
 
95
+ 1. Select your embedding model / Sélectionnez votre modèle d'embedding
96
+ 2. Adjust chunking parameters if needed / Ajustez les paramètres de découpage si nécessaire
97
+ 3. Click "Process Corpus" / Cliquez sur "Process Corpus"
98
  """)
99
 
100
  # Embedding model selection FIRST
 
105
  "nomic-ai/nomic-embed-text-v2-moe",
106
  ],
107
  value="sentence-transformers/all-MiniLM-L6-v2",
108
+ label="🔤 Embedding Model / Modèle d'Embedding (select before processing / sélectionnez avant traitement)"
109
  )
110
 
111
  pdf_upload = gr.File(
112
+ label="📄 Upload PDF / Télécharger PDF (optional / optionnel)",
113
  file_types=[".pdf"]
114
  )
115
 
 
119
  maximum=1000,
120
  value=500,
121
  step=50,
122
+ label="Chunk Size / Taille des Chunks (characters / caractères)"
123
  )
124
  chunk_overlap = gr.Slider(
125
  minimum=0,
126
  maximum=200,
127
  value=50,
128
  step=10,
129
+ label="Chunk Overlap / Chevauchement (characters / caractères)"
130
  )
131
 
132
+ process_btn = gr.Button("🚀 Process Corpus / Traiter le Corpus", variant="primary", size="lg")
133
+ corpus_status = gr.Textbox(label="Status / Statut", interactive=False)
134
 
135
  # Display default corpus info
136
+ with gr.Accordion("📖 Corpus Information / Informations sur le Corpus", open=False):
137
  default_corpus_display = gr.Markdown()
138
 
139
  # Display processed chunks
140
+ with gr.Accordion("📑 Processed Chunks / Chunks Traités", open=False):
141
  processed_chunks_display = gr.Markdown()
142
 
143
  # State to hold example questions
 
150
  )
151
 
152
  # Tab 2: Retrieval Configuration
153
+ with gr.Tab(label="🔍 Retrieval / Récupération"):
154
+ gr.Markdown("## Retrieval Configuration / Configuration de la Récupération")
155
+ gr.Markdown("""
156
+ **EN:** Configure how relevant chunks are retrieved from the corpus.
157
+
158
+ **FR:** Configurez comment les chunks pertinents sont récupérés du corpus.
159
+ """)
160
 
161
+ gr.Markdown("**Current Embedding Model / Modèle d'Embedding Actuel:** The model selected in the Corpus tab / Le modèle sélectionné dans l'onglet Corpus")
162
 
163
  with gr.Row():
164
  top_k = gr.Slider(
 
166
  maximum=10,
167
  value=3,
168
  step=1,
169
+ label="Top K (number of chunks / nombre de chunks à récupérer)"
170
  )
171
  similarity_threshold = gr.Slider(
172
  minimum=0.0,
173
  maximum=1.0,
174
  value=0.5,
175
  step=0.05,
176
+ label="Similarity Threshold / Seuil de Similarité (minimum score / score minimum)"
177
  )
178
 
179
  # Tab 3: Generation Configuration
180
+ with gr.Tab(label="🤖 Generation / Génération"):
181
+ gr.Markdown("## Generation Configuration / Configuration de la Génération")
182
+ gr.Markdown("""
183
+ **EN:** Select the language model and configure generation parameters.
184
+
185
+ **FR:** Sélectionnez le modèle de langage et configurez les paramètres de génération.
186
+ """)
187
 
188
  llm_model = gr.Dropdown(
189
  choices=[
 
192
  "google/gemma-2-2b-it",
193
  ],
194
  value="meta-llama/Llama-3.2-1B-Instruct",
195
+ label="Language Model / Modèle de Langage"
196
  )
197
 
198
  with gr.Row():
 
201
  maximum=2.0,
202
  value=0.7,
203
  step=0.1,
204
+ label="Temperature / Température (creativity / créativité)"
205
  )
206
  max_tokens = gr.Slider(
207
  minimum=100,
208
  maximum=2048,
209
  value=800,
210
  step=50,
211
+ label="Max Tokens (response length / longueur réponse - higher for reasoning / plus pour raisonnement)"
212
  )
213
 
214
  # Tab 4: Query & Results
215
+ with gr.Tab(label="💬 Query / Requête"):
216
+ gr.Markdown("## Ask a Question / Posez une Question")
217
 
218
  query_input = gr.Textbox(
219
+ label="Your Question / Votre Question",
220
+ placeholder="Enter your question here / Entrez votre question ici...",
221
  lines=3
222
  )
223
 
224
+ with gr.Accordion("💡 Example Questions / Questions d'Exemple (click to expand / cliquez pour développer)", open=True):
225
+ gr.Markdown("*Questions generated based on your corpus content / Questions générées à partir de votre corpus*")
226
  examples_markdown = gr.Markdown(visible=False)
227
 
228
  # Connect processing to update examples