Update app.py
Browse files
app.py
CHANGED
|
@@ -5,54 +5,42 @@ import os
|
|
| 5 |
import json
|
| 6 |
from datetime import datetime
|
| 7 |
from google import genai
|
| 8 |
-
from
|
| 9 |
-
import
|
| 10 |
import uuid
|
|
|
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
|
| 14 |
-
#
|
| 15 |
GOOGLE_API_KEY = "AIzaSyAMYpF67aqFnWDJESWOx1dC-w3sEU29VcM" # Remplacez par votre clé API
|
| 16 |
-
MODEL_ID = "gemini-2.0-flash
|
| 17 |
UPLOAD_FOLDER = 'uploads'
|
| 18 |
RESULTS_FOLDER = 'results'
|
| 19 |
|
| 20 |
-
safety_settings = [
|
| 21 |
-
types.SafetySetting(
|
| 22 |
-
category="HARM_CATEGORY_HARASSMENT",
|
| 23 |
-
threshold="BLOCK_NONE",
|
| 24 |
-
),
|
| 25 |
-
types.SafetySetting(
|
| 26 |
-
category="HARM_CATEGORY_HATE_SPEECH",
|
| 27 |
-
threshold="BLOCK_NONE",
|
| 28 |
-
),
|
| 29 |
-
types.SafetySetting(
|
| 30 |
-
category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
| 31 |
-
threshold="BLOCK_NONE",
|
| 32 |
-
),
|
| 33 |
-
types.SafetySetting(
|
| 34 |
-
category="HARM_CATEGORY_DANGEROUS_CONTENT",
|
| 35 |
-
threshold="BLOCK_NONE",
|
| 36 |
-
),
|
| 37 |
-
]
|
| 38 |
-
|
| 39 |
# Créer les dossiers s'ils n'existent pas
|
| 40 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 41 |
os.makedirs(RESULTS_FOLDER, exist_ok=True)
|
| 42 |
|
| 43 |
-
# Définition
|
| 44 |
-
class TranslationPair(
|
| 45 |
-
fang: str
|
| 46 |
-
francais: str
|
| 47 |
|
| 48 |
-
class SyntheticDataResponse(
|
| 49 |
-
request_number: int
|
| 50 |
-
generated_pairs:
|
| 51 |
-
timestamp: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Stockage des tâches en cours
|
| 54 |
-
tasks = {}
|
| 55 |
-
|
| 56 |
class TaskManager:
|
| 57 |
def __init__(self):
|
| 58 |
self.tasks = {}
|
|
@@ -62,11 +50,11 @@ class TaskManager:
|
|
| 62 |
'status': 'running',
|
| 63 |
'progress': 0,
|
| 64 |
'total': 470,
|
| 65 |
-
'results_file': f'results_{task_id}.json',
|
| 66 |
'start_time': datetime.now(),
|
| 67 |
'errors': [],
|
| 68 |
'last_update': datetime.now(),
|
| 69 |
-
'all_data': []
|
| 70 |
}
|
| 71 |
|
| 72 |
def update_progress(self, task_id, progress, data=None):
|
|
@@ -90,50 +78,23 @@ class TaskManager:
|
|
| 90 |
|
| 91 |
task_manager = TaskManager()
|
| 92 |
|
| 93 |
-
def parse_response_to_pairs(response_text, request_num):
|
| 94 |
-
"""Parse la réponse textuelle pour extraire les paires fang/français"""
|
| 95 |
-
pairs = []
|
| 96 |
-
lines = response_text.strip().split('\n')
|
| 97 |
-
|
| 98 |
-
current_fang = ""
|
| 99 |
-
current_francais = ""
|
| 100 |
-
|
| 101 |
-
for line in lines:
|
| 102 |
-
line = line.strip()
|
| 103 |
-
if line.lower().startswith('fang :') or line.lower().startswith('fang:'):
|
| 104 |
-
current_fang = line.split(':', 1)[1].strip() if ':' in line else line
|
| 105 |
-
elif line.lower().startswith('français :') or line.lower().startswith('francais:') or line.lower().startswith('français:'):
|
| 106 |
-
current_francais = line.split(':', 1)[1].strip() if ':' in line else line
|
| 107 |
-
|
| 108 |
-
# Si on a une paire complète, l'ajouter
|
| 109 |
-
if current_fang and current_francais:
|
| 110 |
-
pairs.append({
|
| 111 |
-
"fang": current_fang,
|
| 112 |
-
"francais": current_francais
|
| 113 |
-
})
|
| 114 |
-
current_fang = ""
|
| 115 |
-
current_francais = ""
|
| 116 |
-
|
| 117 |
-
return {
|
| 118 |
-
"request_number": request_num,
|
| 119 |
-
"generated_pairs": pairs,
|
| 120 |
-
"timestamp": datetime.now().isoformat()
|
| 121 |
-
}
|
| 122 |
-
|
| 123 |
def generate_synthetic_data(file_path, task_id):
|
| 124 |
-
"""Fonction qui exécute les 470 requêtes en arrière-plan avec sortie JSON"""
|
| 125 |
try:
|
| 126 |
-
# Initialiser le client Google AI
|
| 127 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 128 |
|
| 129 |
# Uploader le fichier
|
| 130 |
-
|
|
|
|
| 131 |
|
| 132 |
-
# Prompt
|
| 133 |
-
prompt = """
|
| 134 |
-
Une
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
|
| 138 |
# Fichier de résultats JSON
|
| 139 |
results_file = os.path.join(RESULTS_FOLDER, f'results_{task_id}.json')
|
|
@@ -144,51 +105,86 @@ def generate_synthetic_data(file_path, task_id):
|
|
| 144 |
"task_id": task_id,
|
| 145 |
"start_time": datetime.now().isoformat(),
|
| 146 |
"total_requests": 470,
|
| 147 |
-
"model_used": MODEL_ID
|
|
|
|
| 148 |
},
|
| 149 |
"requests": [],
|
| 150 |
"summary": {
|
| 151 |
"total_pairs": 0,
|
| 152 |
"completed_requests": 0,
|
|
|
|
| 153 |
"errors": []
|
| 154 |
}
|
| 155 |
}
|
| 156 |
|
| 157 |
for i in range(470):
|
| 158 |
try:
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
response = client.models.generate_content(
|
| 161 |
model=MODEL_ID,
|
| 162 |
-
contents=[
|
| 163 |
-
config=
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
)
|
| 168 |
)
|
| 169 |
|
|
|
|
| 170 |
try:
|
| 171 |
-
#
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
all_results["requests"].append(request_data)
|
| 183 |
all_results["summary"]["total_pairs"] += request_data["pairs_count"]
|
| 184 |
all_results["summary"]["completed_requests"] += 1
|
| 185 |
|
| 186 |
-
except json.JSONDecodeError:
|
| 187 |
-
#
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# Sauvegarder après chaque requête
|
| 194 |
with open(results_file, 'w', encoding='utf-8') as f:
|
|
@@ -197,14 +193,25 @@ def generate_synthetic_data(file_path, task_id):
|
|
| 197 |
# Mettre à jour le progrès
|
| 198 |
task_manager.update_progress(task_id, i + 1)
|
| 199 |
|
| 200 |
-
print(f"Requête {i+1}/470 complétée")
|
| 201 |
|
| 202 |
-
# Pause pour
|
| 203 |
-
time.sleep(
|
| 204 |
|
| 205 |
except Exception as e:
|
|
|
|
| 206 |
error_msg = f"Erreur requête {i+1}: {str(e)}"
|
| 207 |
task_manager.add_error(task_id, error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
all_results["summary"]["errors"].append({
|
| 209 |
"request_number": i + 1,
|
| 210 |
"error": error_msg,
|
|
@@ -216,16 +223,29 @@ def generate_synthetic_data(file_path, task_id):
|
|
| 216 |
json.dump(all_results, f, ensure_ascii=False, indent=2)
|
| 217 |
|
| 218 |
print(error_msg)
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
# Finaliser le fichier JSON
|
| 221 |
all_results["metadata"]["end_time"] = datetime.now().isoformat()
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
with open(results_file, 'w', encoding='utf-8') as f:
|
| 225 |
json.dump(all_results, f, ensure_ascii=False, indent=2)
|
| 226 |
|
| 227 |
task_manager.complete_task(task_id)
|
| 228 |
print(f"Tâche {task_id} terminée avec succès")
|
|
|
|
|
|
|
| 229 |
|
| 230 |
except Exception as e:
|
| 231 |
error_msg = f"Erreur générale: {str(e)}"
|
|
@@ -267,7 +287,8 @@ def upload_file():
|
|
| 267 |
|
| 268 |
return jsonify({
|
| 269 |
'task_id': task_id,
|
| 270 |
-
'message': 'Traitement démarré en arrière-plan'
|
|
|
|
| 271 |
})
|
| 272 |
|
| 273 |
@app.route('/status/<task_id>')
|
|
@@ -283,7 +304,8 @@ def get_status(task_id):
|
|
| 283 |
'percentage': round((task['progress'] / task['total']) * 100, 2),
|
| 284 |
'errors_count': len(task['errors']),
|
| 285 |
'start_time': task['start_time'].strftime('%Y-%m-%d %H:%M:%S'),
|
| 286 |
-
'last_update': task['last_update'].strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
| 287 |
})
|
| 288 |
|
| 289 |
@app.route('/download/<task_id>')
|
|
@@ -361,7 +383,7 @@ def list_tasks():
|
|
| 361 |
|
| 362 |
@app.route('/cleanup')
|
| 363 |
def cleanup_temp_files():
|
| 364 |
-
"""Nettoyer les fichiers temporaires
|
| 365 |
try:
|
| 366 |
temp_files_deleted = 0
|
| 367 |
for filename in os.listdir(RESULTS_FOLDER):
|
|
@@ -378,7 +400,7 @@ def cleanup_temp_files():
|
|
| 378 |
|
| 379 |
@app.route('/preview/<task_id>')
|
| 380 |
def preview_results(task_id):
|
| 381 |
-
"""Aperçu des résultats JSON
|
| 382 |
task = task_manager.get_task(task_id)
|
| 383 |
if not task:
|
| 384 |
return jsonify({'error': 'Tâche non trouvée'}), 404
|
|
@@ -392,12 +414,17 @@ def preview_results(task_id):
|
|
| 392 |
with open(results_file, 'r', encoding='utf-8') as f:
|
| 393 |
data = json.load(f)
|
| 394 |
|
| 395 |
-
# Retourner un aperçu des données
|
| 396 |
preview = {
|
| 397 |
"metadata": data.get("metadata", {}),
|
| 398 |
"summary": data.get("summary", {}),
|
| 399 |
"sample_requests": data.get("requests", [])[:3], # 3 premiers échantillons
|
| 400 |
-
"total_requests": len(data.get("requests", []))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
}
|
| 402 |
|
| 403 |
return jsonify(preview)
|
|
@@ -405,9 +432,40 @@ def preview_results(task_id):
|
|
| 405 |
except Exception as e:
|
| 406 |
return jsonify({'error': f'Erreur lors de la lecture du fichier: {str(e)}'}), 500
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
if __name__ == '__main__':
|
| 409 |
-
print("🚀 Démarrage du serveur...")
|
| 410 |
print("📂 Dossiers créés:", UPLOAD_FOLDER, RESULTS_FOLDER)
|
| 411 |
print("🌐 Application disponible sur: http://localhost:5000")
|
| 412 |
-
print("📊 Sortie JSON activée")
|
|
|
|
|
|
|
| 413 |
app.run(debug=True, threaded=True)
|
|
|
|
| 5 |
import json
|
| 6 |
from datetime import datetime
|
| 7 |
from google import genai
|
| 8 |
+
from pydantic import BaseModel, Field
|
| 9 |
+
import enum
|
| 10 |
import uuid
|
| 11 |
+
from typing import List
|
| 12 |
|
| 13 |
app = Flask(__name__)
|
| 14 |
|
| 15 |
+
# Configuration
|
| 16 |
GOOGLE_API_KEY = "AIzaSyAMYpF67aqFnWDJESWOx1dC-w3sEU29VcM" # Remplacez par votre clé API
|
| 17 |
+
MODEL_ID = "gemini-2.0-flash" # Modèle recommandé selon la documentation
|
| 18 |
UPLOAD_FOLDER = 'uploads'
|
| 19 |
RESULTS_FOLDER = 'results'
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# Créer les dossiers s'ils n'existent pas
|
| 22 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 23 |
os.makedirs(RESULTS_FOLDER, exist_ok=True)
|
| 24 |
|
| 25 |
+
# Définition des schémas Pydantic selon la documentation
|
| 26 |
+
class TranslationPair(BaseModel):
|
| 27 |
+
fang: str = Field(description="Phrase en langue fang")
|
| 28 |
+
francais: str = Field(description="Traduction française de la phrase")
|
| 29 |
|
| 30 |
+
class SyntheticDataResponse(BaseModel):
|
| 31 |
+
request_number: int = Field(description="Numéro de la requête")
|
| 32 |
+
generated_pairs: List[TranslationPair] = Field(description="Liste des paires de traduction générées")
|
| 33 |
+
timestamp: str = Field(description="Horodatage de la génération")
|
| 34 |
+
|
| 35 |
+
class Config:
|
| 36 |
+
# Configuration pour un ordre de propriétés cohérent
|
| 37 |
+
fields = {
|
| 38 |
+
"request_number": {"title": "Numéro de requête"},
|
| 39 |
+
"generated_pairs": {"title": "Paires générées"},
|
| 40 |
+
"timestamp": {"title": "Horodatage"}
|
| 41 |
+
}
|
| 42 |
|
| 43 |
# Stockage des tâches en cours
|
|
|
|
|
|
|
| 44 |
class TaskManager:
|
| 45 |
def __init__(self):
|
| 46 |
self.tasks = {}
|
|
|
|
| 50 |
'status': 'running',
|
| 51 |
'progress': 0,
|
| 52 |
'total': 470,
|
| 53 |
+
'results_file': f'results_{task_id}.json',
|
| 54 |
'start_time': datetime.now(),
|
| 55 |
'errors': [],
|
| 56 |
'last_update': datetime.now(),
|
| 57 |
+
'all_data': []
|
| 58 |
}
|
| 59 |
|
| 60 |
def update_progress(self, task_id, progress, data=None):
|
|
|
|
| 78 |
|
| 79 |
task_manager = TaskManager()
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
def generate_synthetic_data(file_path, task_id):
|
| 82 |
+
"""Fonction qui exécute les 470 requêtes en arrière-plan avec sortie JSON structurée"""
|
| 83 |
try:
|
| 84 |
+
# Initialiser le client Google AI selon la documentation
|
| 85 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 86 |
|
| 87 |
# Uploader le fichier
|
| 88 |
+
with open(file_path, 'rb') as f:
|
| 89 |
+
uploaded_file = client.files.upload(file=f)
|
| 90 |
|
| 91 |
+
# Prompt optimisé pour la génération de données synthétiques
|
| 92 |
+
prompt = """À partir du contenu de ce fichier, génère exactement 400 nouvelles paires de phrases :
|
| 93 |
+
- Une phrase en langue fang
|
| 94 |
+
- Sa traduction en français
|
| 95 |
+
|
| 96 |
+
Varie les structures grammaticales, les contextes et le vocabulaire pour créer des données d'entraînement diversifiées.
|
| 97 |
+
Assure-toi que chaque paire soit cohérente et naturelle dans les deux langues."""
|
| 98 |
|
| 99 |
# Fichier de résultats JSON
|
| 100 |
results_file = os.path.join(RESULTS_FOLDER, f'results_{task_id}.json')
|
|
|
|
| 105 |
"task_id": task_id,
|
| 106 |
"start_time": datetime.now().isoformat(),
|
| 107 |
"total_requests": 470,
|
| 108 |
+
"model_used": MODEL_ID,
|
| 109 |
+
"schema_version": "1.0"
|
| 110 |
},
|
| 111 |
"requests": [],
|
| 112 |
"summary": {
|
| 113 |
"total_pairs": 0,
|
| 114 |
"completed_requests": 0,
|
| 115 |
+
"failed_requests": 0,
|
| 116 |
"errors": []
|
| 117 |
}
|
| 118 |
}
|
| 119 |
|
| 120 |
for i in range(470):
|
| 121 |
try:
|
| 122 |
+
print(f"Traitement de la requête {i+1}/470...")
|
| 123 |
+
|
| 124 |
+
# Faire la requête avec schéma JSON selon la documentation
|
| 125 |
response = client.models.generate_content(
|
| 126 |
model=MODEL_ID,
|
| 127 |
+
contents=[uploaded_file, prompt],
|
| 128 |
+
config={
|
| 129 |
+
'response_mime_type': 'application/json',
|
| 130 |
+
'response_schema': SyntheticDataResponse,
|
| 131 |
+
}
|
|
|
|
| 132 |
)
|
| 133 |
|
| 134 |
+
# Parser la réponse avec le schéma Pydantic
|
| 135 |
try:
|
| 136 |
+
# Utiliser la méthode .parsed pour récupérer l'objet structuré
|
| 137 |
+
if hasattr(response, 'parsed') and response.parsed:
|
| 138 |
+
structured_data = response.parsed
|
| 139 |
+
request_data = {
|
| 140 |
+
"request_number": i + 1,
|
| 141 |
+
"timestamp": datetime.now().isoformat(),
|
| 142 |
+
"response": {
|
| 143 |
+
"request_number": structured_data.request_number,
|
| 144 |
+
"generated_pairs": [
|
| 145 |
+
{"fang": pair.fang, "francais": pair.francais}
|
| 146 |
+
for pair in structured_data.generated_pairs
|
| 147 |
+
],
|
| 148 |
+
"timestamp": structured_data.timestamp
|
| 149 |
+
},
|
| 150 |
+
"pairs_count": len(structured_data.generated_pairs),
|
| 151 |
+
"status": "success"
|
| 152 |
+
}
|
| 153 |
+
else:
|
| 154 |
+
# Fallback : parser manuellement la réponse JSON
|
| 155 |
+
response_json = json.loads(response.text)
|
| 156 |
+
request_data = {
|
| 157 |
+
"request_number": i + 1,
|
| 158 |
+
"timestamp": datetime.now().isoformat(),
|
| 159 |
+
"response": response_json,
|
| 160 |
+
"pairs_count": len(response_json.get("generated_pairs", [])),
|
| 161 |
+
"status": "success"
|
| 162 |
+
}
|
| 163 |
|
| 164 |
all_results["requests"].append(request_data)
|
| 165 |
all_results["summary"]["total_pairs"] += request_data["pairs_count"]
|
| 166 |
all_results["summary"]["completed_requests"] += 1
|
| 167 |
|
| 168 |
+
except (json.JSONDecodeError, AttributeError) as parse_error:
|
| 169 |
+
# En cas d'erreur de parsing, sauvegarder la réponse brute
|
| 170 |
+
error_data = {
|
| 171 |
+
"request_number": i + 1,
|
| 172 |
+
"timestamp": datetime.now().isoformat(),
|
| 173 |
+
"raw_response": response.text,
|
| 174 |
+
"pairs_count": 0,
|
| 175 |
+
"status": "parse_error",
|
| 176 |
+
"error": str(parse_error)
|
| 177 |
+
}
|
| 178 |
+
all_results["requests"].append(error_data)
|
| 179 |
+
all_results["summary"]["failed_requests"] += 1
|
| 180 |
+
|
| 181 |
+
error_msg = f"Erreur de parsing requête {i+1}: {str(parse_error)}"
|
| 182 |
+
task_manager.add_error(task_id, error_msg)
|
| 183 |
+
all_results["summary"]["errors"].append({
|
| 184 |
+
"request_number": i + 1,
|
| 185 |
+
"error": error_msg,
|
| 186 |
+
"timestamp": datetime.now().isoformat()
|
| 187 |
+
})
|
| 188 |
|
| 189 |
# Sauvegarder après chaque requête
|
| 190 |
with open(results_file, 'w', encoding='utf-8') as f:
|
|
|
|
| 193 |
# Mettre à jour le progrès
|
| 194 |
task_manager.update_progress(task_id, i + 1)
|
| 195 |
|
| 196 |
+
print(f"Requête {i+1}/470 complétée avec {request_data.get('pairs_count', 0)} paires")
|
| 197 |
|
| 198 |
+
# Pause pour respecter les limites de l'API
|
| 199 |
+
time.sleep(2) # Réduit à 2 secondes selon les bonnes pratiques
|
| 200 |
|
| 201 |
except Exception as e:
|
| 202 |
+
# Gestion des erreurs de requête
|
| 203 |
error_msg = f"Erreur requête {i+1}: {str(e)}"
|
| 204 |
task_manager.add_error(task_id, error_msg)
|
| 205 |
+
|
| 206 |
+
error_data = {
|
| 207 |
+
"request_number": i + 1,
|
| 208 |
+
"timestamp": datetime.now().isoformat(),
|
| 209 |
+
"pairs_count": 0,
|
| 210 |
+
"status": "request_error",
|
| 211 |
+
"error": error_msg
|
| 212 |
+
}
|
| 213 |
+
all_results["requests"].append(error_data)
|
| 214 |
+
all_results["summary"]["failed_requests"] += 1
|
| 215 |
all_results["summary"]["errors"].append({
|
| 216 |
"request_number": i + 1,
|
| 217 |
"error": error_msg,
|
|
|
|
| 223 |
json.dump(all_results, f, ensure_ascii=False, indent=2)
|
| 224 |
|
| 225 |
print(error_msg)
|
| 226 |
+
|
| 227 |
+
# Pause plus longue en cas d'erreur
|
| 228 |
+
time.sleep(5)
|
| 229 |
|
| 230 |
+
# Finaliser le fichier JSON avec statistiques complètes
|
| 231 |
all_results["metadata"]["end_time"] = datetime.now().isoformat()
|
| 232 |
+
start_time = datetime.fromisoformat(all_results["metadata"]["start_time"])
|
| 233 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 234 |
+
all_results["metadata"]["duration_seconds"] = duration
|
| 235 |
+
all_results["metadata"]["duration_minutes"] = round(duration / 60, 2)
|
| 236 |
+
|
| 237 |
+
# Statistiques finales
|
| 238 |
+
all_results["summary"]["success_rate"] = round(
|
| 239 |
+
(all_results["summary"]["completed_requests"] / 470) * 100, 2
|
| 240 |
+
)
|
| 241 |
|
| 242 |
with open(results_file, 'w', encoding='utf-8') as f:
|
| 243 |
json.dump(all_results, f, ensure_ascii=False, indent=2)
|
| 244 |
|
| 245 |
task_manager.complete_task(task_id)
|
| 246 |
print(f"Tâche {task_id} terminée avec succès")
|
| 247 |
+
print(f"Total de paires générées: {all_results['summary']['total_pairs']}")
|
| 248 |
+
print(f"Taux de succès: {all_results['summary']['success_rate']}%")
|
| 249 |
|
| 250 |
except Exception as e:
|
| 251 |
error_msg = f"Erreur générale: {str(e)}"
|
|
|
|
| 287 |
|
| 288 |
return jsonify({
|
| 289 |
'task_id': task_id,
|
| 290 |
+
'message': 'Traitement démarré en arrière-plan',
|
| 291 |
+
'estimated_duration': '15-20 minutes'
|
| 292 |
})
|
| 293 |
|
| 294 |
@app.route('/status/<task_id>')
|
|
|
|
| 304 |
'percentage': round((task['progress'] / task['total']) * 100, 2),
|
| 305 |
'errors_count': len(task['errors']),
|
| 306 |
'start_time': task['start_time'].strftime('%Y-%m-%d %H:%M:%S'),
|
| 307 |
+
'last_update': task['last_update'].strftime('%Y-%m-%d %H:%M:%S'),
|
| 308 |
+
'estimated_remaining': max(0, (task['total'] - task['progress']) * 2) if task['status'] == 'running' else 0
|
| 309 |
})
|
| 310 |
|
| 311 |
@app.route('/download/<task_id>')
|
|
|
|
| 383 |
|
| 384 |
@app.route('/cleanup')
|
| 385 |
def cleanup_temp_files():
|
| 386 |
+
"""Nettoyer les fichiers temporaires"""
|
| 387 |
try:
|
| 388 |
temp_files_deleted = 0
|
| 389 |
for filename in os.listdir(RESULTS_FOLDER):
|
|
|
|
| 400 |
|
| 401 |
@app.route('/preview/<task_id>')
|
| 402 |
def preview_results(task_id):
|
| 403 |
+
"""Aperçu des résultats JSON avec statistiques"""
|
| 404 |
task = task_manager.get_task(task_id)
|
| 405 |
if not task:
|
| 406 |
return jsonify({'error': 'Tâche non trouvée'}), 404
|
|
|
|
| 414 |
with open(results_file, 'r', encoding='utf-8') as f:
|
| 415 |
data = json.load(f)
|
| 416 |
|
| 417 |
+
# Retourner un aperçu enrichi des données
|
| 418 |
preview = {
|
| 419 |
"metadata": data.get("metadata", {}),
|
| 420 |
"summary": data.get("summary", {}),
|
| 421 |
"sample_requests": data.get("requests", [])[:3], # 3 premiers échantillons
|
| 422 |
+
"total_requests": len(data.get("requests", [])),
|
| 423 |
+
"structure_info": {
|
| 424 |
+
"schema_used": "SyntheticDataResponse",
|
| 425 |
+
"fields": ["request_number", "generated_pairs", "timestamp"],
|
| 426 |
+
"pair_structure": ["fang", "francais"]
|
| 427 |
+
}
|
| 428 |
}
|
| 429 |
|
| 430 |
return jsonify(preview)
|
|
|
|
| 432 |
except Exception as e:
|
| 433 |
return jsonify({'error': f'Erreur lors de la lecture du fichier: {str(e)}'}), 500
|
| 434 |
|
| 435 |
+
@app.route('/schema')
|
| 436 |
+
def get_schema_info():
|
| 437 |
+
"""Endpoint pour obtenir des informations sur le schéma utilisé"""
|
| 438 |
+
schema_info = {
|
| 439 |
+
"schema_version": "1.0",
|
| 440 |
+
"models": {
|
| 441 |
+
"TranslationPair": {
|
| 442 |
+
"fields": {
|
| 443 |
+
"fang": "string - Phrase en langue fang",
|
| 444 |
+
"francais": "string - Traduction française"
|
| 445 |
+
}
|
| 446 |
+
},
|
| 447 |
+
"SyntheticDataResponse": {
|
| 448 |
+
"fields": {
|
| 449 |
+
"request_number": "integer - Numéro de la requête",
|
| 450 |
+
"generated_pairs": "array[TranslationPair] - Liste des paires générées",
|
| 451 |
+
"timestamp": "string - Horodatage ISO 8601"
|
| 452 |
+
}
|
| 453 |
+
}
|
| 454 |
+
},
|
| 455 |
+
"api_configuration": {
|
| 456 |
+
"model": MODEL_ID,
|
| 457 |
+
"response_mime_type": "application/json",
|
| 458 |
+
"structured_output": True
|
| 459 |
+
}
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
return jsonify(schema_info)
|
| 463 |
+
|
| 464 |
if __name__ == '__main__':
|
| 465 |
+
print("🚀 Démarrage du serveur avec configuration Gemini API optimisée...")
|
| 466 |
print("📂 Dossiers créés:", UPLOAD_FOLDER, RESULTS_FOLDER)
|
| 467 |
print("🌐 Application disponible sur: http://localhost:5000")
|
| 468 |
+
print("📊 Sortie JSON structurée activée avec schémas Pydantic")
|
| 469 |
+
print("🔧 Modèle utilisé:", MODEL_ID)
|
| 470 |
+
print("📋 Endpoint de schéma disponible: /schema")
|
| 471 |
app.run(debug=True, threaded=True)
|