Zeggai Abdellah commited on
Commit
8517947
·
1 Parent(s): 944616a

add status

Browse files
Files changed (2) hide show
  1. app.py +113 -38
  2. index.html +157 -17
app.py CHANGED
@@ -4,10 +4,11 @@ import json
4
  from dotenv import load_dotenv
5
  import time
6
  import uuid
7
- from typing import List, Dict
8
  from datetime import datetime
9
  from huggingface_hub import HfApi # For file persistence in Spaces
10
  import os
 
11
 
12
  # Load environment variables from .env file
13
  load_dotenv()
@@ -16,6 +17,20 @@ from langchain_google_genai import GoogleGenerativeAI
16
 
17
  app = FastAPI()
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def estimate_difficulty(question: str, q_type: str) -> str:
20
  """
21
  Estimate question difficulty based on type and content.
@@ -99,6 +114,10 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-fl
99
  "validated": False
100
  })
101
 
 
 
 
 
102
  return formatted_questions
103
 
104
  except Exception as e:
@@ -108,33 +127,60 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-fl
108
  print(f"Erreur de parsing de la réponse API pour le chunk {chunk_id}: {e}")
109
  return []
110
 
111
- def generate_questions_for_document(chunks: List[str]) -> Dict:
112
  """
113
- Generate questions for all document chunks and structure as a scientific dataset.
114
  """
115
- all_questions = []
116
 
117
- for i, chunk in enumerate(chunks):
118
- print(f"Processing chunk {i+1}/{len(chunks)}...")
119
- questions = generate_questions_for_chunk(chunk, i)
120
- all_questions.extend(questions)
121
- time.sleep(9) # Rate limiting
122
-
123
- dataset = {
124
- "dataset_info": {
125
- "title": "Vaccine Guide Question-Answer Dataset",
126
- "description": "A dataset of question-answer pairs generated from a vaccine guide for AI language model training.",
127
- "version": "1.1.0",
128
- "created_date": datetime.utcnow().isoformat(),
129
- "source": "Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.pdf",
130
- "generated_by": "Gemini API",
131
- "total_questions": len(all_questions),
132
- "intended_use": "Fine-tuning medical language models for knowledge recall and reasoning"
133
- },
134
- "questions": all_questions
135
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- return dataset
 
 
 
 
138
 
139
  def save_dataset_to_space(dataset: Dict, filename: str):
140
  """
@@ -151,34 +197,63 @@ async def generate_questions():
151
  """
152
  Endpoint to generate questions from the vaccine guide chunks
153
  """
 
 
 
 
 
 
 
 
 
 
 
154
  try:
 
 
 
 
 
 
 
 
 
155
 
156
- chunks_data = None
157
-
158
-
159
  with open("./chunks.json", "r", encoding="utf-8") as f:
160
- chunks_data = json.load(f)
161
-
162
 
163
  if chunks_data is None:
164
  raise HTTPException(status_code=404, detail="Chunks file not found in any known location")
165
 
166
- VACCINE_CHUNKS = [chunks_data[0]["text"]]
167
- dataset = generate_questions_for_document(VACCINE_CHUNKS)
168
 
169
- # Save to persistent storage
170
- filename = "vaccine_questions.json"
171
- save_dataset_to_space(dataset, filename)
 
172
 
173
  return {
174
- "status": "success",
175
- "message": "Questions generated successfully",
176
- "dataset_info": dataset["dataset_info"],
177
- "download_url": f"/download/{filename}"
178
  }
179
  except Exception as e:
 
 
 
180
  raise HTTPException(status_code=500, detail=str(e))
181
 
 
 
 
 
 
 
 
 
 
 
182
  @app.get("/download/{filename}")
183
  async def download_file(filename: str):
184
  """
 
4
  from dotenv import load_dotenv
5
  import time
6
  import uuid
7
+ from typing import List, Dict, Optional
8
  from datetime import datetime
9
  from huggingface_hub import HfApi # For file persistence in Spaces
10
  import os
11
+ import threading
12
 
13
  # Load environment variables from .env file
14
  load_dotenv()
 
17
 
18
  app = FastAPI()
19
 
20
+ # Global variables to track generation status
21
+ generation_status = {
22
+ "is_running": False,
23
+ "start_time": None,
24
+ "processed_chunks": 0,
25
+ "total_chunks": 0,
26
+ "questions_generated": 0,
27
+ "completed": False,
28
+ "result_file": None,
29
+ "error": None
30
+ }
31
+
32
+ generation_lock = threading.Lock()
33
+
34
  def estimate_difficulty(question: str, q_type: str) -> str:
35
  """
36
  Estimate question difficulty based on type and content.
 
114
  "validated": False
115
  })
116
 
117
+ # Update the global status
118
+ with generation_lock:
119
+ generation_status["questions_generated"] += len(formatted_questions)
120
+
121
  return formatted_questions
122
 
123
  except Exception as e:
 
127
  print(f"Erreur de parsing de la réponse API pour le chunk {chunk_id}: {e}")
128
  return []
129
 
130
+ def generate_questions_in_background(chunks: List[str]):
131
  """
132
+ Generate questions in a background thread and update status.
133
  """
134
+ global generation_status
135
 
136
+ try:
137
+ all_questions = []
138
+
139
+ with generation_lock:
140
+ generation_status["total_chunks"] = len(chunks)
141
+ generation_status["processed_chunks"] = 0
142
+ generation_status["questions_generated"] = 0
143
+
144
+ for i, chunk in enumerate(chunks):
145
+ print(f"Processing chunk {i+1}/{len(chunks)}...")
146
+ questions = generate_questions_for_chunk(chunk, i)
147
+ all_questions.extend(questions)
148
+
149
+ with generation_lock:
150
+ generation_status["processed_chunks"] = i + 1
151
+
152
+ time.sleep(9) # Rate limiting
153
+
154
+ dataset = {
155
+ "dataset_info": {
156
+ "title": "Vaccine Guide Question-Answer Dataset",
157
+ "description": "A dataset of question-answer pairs generated from a vaccine guide for AI language model training.",
158
+ "version": "1.1.0",
159
+ "created_date": datetime.utcnow().isoformat(),
160
+ "source": "Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.pdf",
161
+ "generated_by": "Gemini API",
162
+ "total_questions": len(all_questions),
163
+ "intended_use": "Fine-tuning medical language models for knowledge recall and reasoning"
164
+ },
165
+ "questions": all_questions
166
+ }
167
+
168
+ # Save the dataset
169
+ filename = f"vaccine_questions_{int(time.time())}.json"
170
+ with open(f"./{filename}", 'w', encoding='utf-8') as f:
171
+ json.dump(dataset, f, indent=4, ensure_ascii=False)
172
+
173
+ # Update status to completed
174
+ with generation_lock:
175
+ generation_status["completed"] = True
176
+ generation_status["is_running"] = False
177
+ generation_status["result_file"] = filename
178
 
179
+ except Exception as e:
180
+ print(f"Error in background generation: {e}")
181
+ with generation_lock:
182
+ generation_status["error"] = str(e)
183
+ generation_status["is_running"] = False
184
 
185
  def save_dataset_to_space(dataset: Dict, filename: str):
186
  """
 
197
  """
198
  Endpoint to generate questions from the vaccine guide chunks
199
  """
200
+ global generation_status
201
+
202
+ # Check if generation is already running
203
+ with generation_lock:
204
+ if generation_status["is_running"]:
205
+ return {
206
+ "status": "running",
207
+ "message": "Generation already in progress",
208
+ "current_status": generation_status
209
+ }
210
+
211
  try:
212
+ # Reset status
213
+ with generation_lock:
214
+ generation_status["is_running"] = True
215
+ generation_status["start_time"] = datetime.utcnow().isoformat()
216
+ generation_status["processed_chunks"] = 0
217
+ generation_status["questions_generated"] = 0
218
+ generation_status["completed"] = False
219
+ generation_status["result_file"] = None
220
+ generation_status["error"] = None
221
 
222
+ # Load chunks
 
 
223
  with open("./chunks.json", "r", encoding="utf-8") as f:
224
+ chunks_data = json.load(f)
 
225
 
226
  if chunks_data is None:
227
  raise HTTPException(status_code=404, detail="Chunks file not found in any known location")
228
 
229
+ VACCINE_CHUNKS = [chunk["text"] for chunk in chunks_data]
 
230
 
231
+ # Start generation in background thread
232
+ thread = threading.Thread(target=generate_questions_in_background, args=(VACCINE_CHUNKS,))
233
+ thread.daemon = True
234
+ thread.start()
235
 
236
  return {
237
+ "status": "started",
238
+ "message": "Question generation started in background",
239
+ "current_status": generation_status
 
240
  }
241
  except Exception as e:
242
+ with generation_lock:
243
+ generation_status["is_running"] = False
244
+ generation_status["error"] = str(e)
245
  raise HTTPException(status_code=500, detail=str(e))
246
 
247
+ @app.get("/generation-status")
248
+ async def get_generation_status():
249
+ """
250
+ Endpoint to check the current status of generation
251
+ """
252
+ with generation_lock:
253
+ status_copy = generation_status.copy()
254
+
255
+ return status_copy
256
+
257
  @app.get("/download/{filename}")
258
  async def download_file(filename: str):
259
  """
index.html CHANGED
@@ -132,6 +132,31 @@
132
  .badge-factual { background-color: #3498db; }
133
  .badge-conceptual { background-color: #9b59b6; }
134
  .badge-applied { background-color: #e67e22; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  </style>
136
  </head>
137
  <body>
@@ -143,8 +168,19 @@
143
  <button id="generateBtn">Générer des Questions</button>
144
 
145
  <div id="statusContainer" style="display: none;">
146
- <div class="loader"></div>
147
- <span id="statusText">Initialisation de la génération...</span>
 
 
 
 
 
 
 
 
 
 
 
148
  </div>
149
  </div>
150
 
@@ -181,23 +217,44 @@
181
  const apiBaseUrl = window.location.origin; // Use the same origin for API calls
182
  let generatedDataset = null;
183
  let downloadUrl = '';
 
184
 
185
  document.getElementById('generateBtn').addEventListener('click', startGeneration);
186
  document.getElementById('downloadBtn').addEventListener('click', downloadDataset);
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  function startGeneration() {
189
  const generateBtn = document.getElementById('generateBtn');
190
  const statusContainer = document.getElementById('statusContainer');
191
- const statusText = document.getElementById('statusText');
192
  const resultsContainer = document.getElementById('resultsContainer');
193
 
194
  // Disable button and show status
195
  generateBtn.disabled = true;
196
  statusContainer.style.display = 'block';
197
- statusText.textContent = 'Génération des questions en cours... Cela peut prendre quelques minutes.';
198
  resultsContainer.style.display = 'none';
199
 
200
- // Call the API endpoint
201
  fetch(`${apiBaseUrl}/generate-questions`)
202
  .then(response => {
203
  if (!response.ok) {
@@ -206,28 +263,111 @@
206
  return response.json();
207
  })
208
  .then(data => {
209
- console.log('Generation successful:', data);
210
- downloadUrl = data.download_url;
211
 
212
- // Fetch the generated dataset
213
- return fetch(`${apiBaseUrl}${downloadUrl}`);
214
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  .then(response => response.json())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  .then(dataset => {
217
  generatedDataset = dataset;
218
  displayResults(dataset);
219
 
220
- // Update status and re-enable button
 
221
  statusText.textContent = 'Génération terminée avec succès !';
222
- setTimeout(() => {
223
- statusContainer.style.display = 'none';
224
- generateBtn.disabled = false;
225
- }, 3000);
226
  })
227
  .catch(error => {
228
- console.error('Error generating questions:', error);
229
- statusText.textContent = `Erreur: ${error.message}`;
230
- generateBtn.disabled = false;
231
  });
232
  }
233
 
 
132
  .badge-factual { background-color: #3498db; }
133
  .badge-conceptual { background-color: #9b59b6; }
134
  .badge-applied { background-color: #e67e22; }
135
+
136
+ .progress-container {
137
+ margin-top: 15px;
138
+ background-color: #ecf0f1;
139
+ border-radius: 4px;
140
+ height: 20px;
141
+ overflow: hidden;
142
+ }
143
+ .progress-bar {
144
+ height: 100%;
145
+ background-color: #3498db;
146
+ transition: width 0.3s ease;
147
+ display: flex;
148
+ align-items: center;
149
+ justify-content: center;
150
+ color: white;
151
+ font-size: 12px;
152
+ font-weight: bold;
153
+ }
154
+ .progress-bar.complete {
155
+ background-color: #27ae60;
156
+ }
157
+ .progress-bar.error {
158
+ background-color: #e74c3c;
159
+ }
160
  </style>
161
  </head>
162
  <body>
 
168
  <button id="generateBtn">Générer des Questions</button>
169
 
170
  <div id="statusContainer" style="display: none;">
171
+ <div id="statusHeader">
172
+ <div class="loader"></div>
173
+ <span id="statusText">Initialisation de la génération...</span>
174
+ </div>
175
+
176
+ <div class="progress-container">
177
+ <div id="progressBar" class="progress-bar" style="width: 0%">0%</div>
178
+ </div>
179
+
180
+ <div id="statusDetails" style="margin-top: 10px;">
181
+ <div>Chunks traités: <span id="processedChunks">0</span>/<span id="totalChunks">0</span></div>
182
+ <div>Questions générées: <span id="questionsGenerated">0</span></div>
183
+ </div>
184
  </div>
185
  </div>
186
 
 
217
  const apiBaseUrl = window.location.origin; // Use the same origin for API calls
218
  let generatedDataset = null;
219
  let downloadUrl = '';
220
+ let statusCheckInterval = null;
221
 
222
  document.getElementById('generateBtn').addEventListener('click', startGeneration);
223
  document.getElementById('downloadBtn').addEventListener('click', downloadDataset);
224
 
225
+ // Check for ongoing generation when the page loads
226
+ window.addEventListener('load', checkOngoingGeneration);
227
+
228
+ function checkOngoingGeneration() {
229
+ fetch(`${apiBaseUrl}/generation-status`)
230
+ .then(response => response.json())
231
+ .then(status => {
232
+ if (status.is_running || status.completed) {
233
+ setupStatusMonitoring();
234
+ updateStatusDisplay(status);
235
+ }
236
+
237
+ if (status.completed && status.result_file) {
238
+ downloadUrl = `/download/${status.result_file}`;
239
+ loadResults();
240
+ }
241
+ })
242
+ .catch(error => {
243
+ console.error('Error checking generation status:', error);
244
+ });
245
+ }
246
+
247
  function startGeneration() {
248
  const generateBtn = document.getElementById('generateBtn');
249
  const statusContainer = document.getElementById('statusContainer');
 
250
  const resultsContainer = document.getElementById('resultsContainer');
251
 
252
  // Disable button and show status
253
  generateBtn.disabled = true;
254
  statusContainer.style.display = 'block';
 
255
  resultsContainer.style.display = 'none';
256
 
257
+ // Call the API endpoint to start generation
258
  fetch(`${apiBaseUrl}/generate-questions`)
259
  .then(response => {
260
  if (!response.ok) {
 
263
  return response.json();
264
  })
265
  .then(data => {
266
+ console.log('Generation started:', data);
 
267
 
268
+ // Setup status monitoring
269
+ setupStatusMonitoring();
270
  })
271
+ .catch(error => {
272
+ console.error('Error starting generation:', error);
273
+ document.getElementById('statusText').textContent = `Erreur: ${error.message}`;
274
+ generateBtn.disabled = false;
275
+ });
276
+ }
277
+
278
+ function setupStatusMonitoring() {
279
+ // Clear any existing interval
280
+ if (statusCheckInterval) {
281
+ clearInterval(statusCheckInterval);
282
+ }
283
+
284
+ // Show the status container
285
+ document.getElementById('statusContainer').style.display = 'block';
286
+
287
+ // Start checking status regularly
288
+ statusCheckInterval = setInterval(checkGenerationStatus, 5000);
289
+
290
+ // Do an immediate check
291
+ checkGenerationStatus();
292
+ }
293
+
294
+ function checkGenerationStatus() {
295
+ fetch(`${apiBaseUrl}/generation-status`)
296
  .then(response => response.json())
297
+ .then(status => {
298
+ updateStatusDisplay(status);
299
+
300
+ // If generation is completed, stop checking and load results
301
+ if (status.completed) {
302
+ clearInterval(statusCheckInterval);
303
+ downloadUrl = `/download/${status.result_file}`;
304
+ loadResults();
305
+ }
306
+
307
+ // If there was an error, stop checking
308
+ if (status.error) {
309
+ clearInterval(statusCheckInterval);
310
+ document.getElementById('generateBtn').disabled = false;
311
+ }
312
+ })
313
+ .catch(error => {
314
+ console.error('Error checking status:', error);
315
+ });
316
+ }
317
+
318
+ function updateStatusDisplay(status) {
319
+ const statusText = document.getElementById('statusText');
320
+ const progressBar = document.getElementById('progressBar');
321
+ const processedChunks = document.getElementById('processedChunks');
322
+ const totalChunks = document.getElementById('totalChunks');
323
+ const questionsGenerated = document.getElementById('questionsGenerated');
324
+
325
+ // Update text and counts
326
+ if (status.error) {
327
+ statusText.textContent = `Erreur: ${status.error}`;
328
+ progressBar.classList.add('error');
329
+ } else if (status.completed) {
330
+ statusText.textContent = 'Génération terminée avec succès !';
331
+ progressBar.classList.add('complete');
332
+ } else if (status.is_running) {
333
+ statusText.textContent = 'Génération des questions en cours...';
334
+ }
335
+
336
+ // Update progress data
337
+ processedChunks.textContent = status.processed_chunks;
338
+ totalChunks.textContent = status.total_chunks;
339
+ questionsGenerated.textContent = status.questions_generated;
340
+
341
+ // Calculate and update progress percentage
342
+ if (status.total_chunks > 0) {
343
+ const percentage = Math.round((status.processed_chunks / status.total_chunks) * 100);
344
+ progressBar.style.width = `${percentage}%`;
345
+ progressBar.textContent = `${percentage}%`;
346
+ }
347
+ }
348
+
349
+ function loadResults() {
350
+ if (!downloadUrl) return;
351
+
352
+ // Fetch the generated dataset
353
+ fetch(`${apiBaseUrl}${downloadUrl}`)
354
+ .then(response => {
355
+ if (!response.ok) {
356
+ throw new Error(`HTTP error! Status: ${response.status}`);
357
+ }
358
+ return response.json();
359
+ })
360
  .then(dataset => {
361
  generatedDataset = dataset;
362
  displayResults(dataset);
363
 
364
+ // Update status
365
+ const statusText = document.getElementById('statusText');
366
  statusText.textContent = 'Génération terminée avec succès !';
367
+ document.getElementById('generateBtn').disabled = false;
 
 
 
368
  })
369
  .catch(error => {
370
+ console.error('Error loading results:', error);
 
 
371
  });
372
  }
373