Trae Assistant commited on
Commit
d8467fb
·
1 Parent(s): 243bd47

feat: optimize functionality, add file upload, improve UI and i18n

Browse files
Files changed (4) hide show
  1. .gitignore +5 -0
  2. Dockerfile +11 -2
  3. app.py +81 -2
  4. templates/index.html +78 -13
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ instance/
2
+ __pycache__/
3
+ *.pyc
4
+ .env
5
+ test_api.py
Dockerfile CHANGED
@@ -1,22 +1,31 @@
1
  # Use an official Python runtime as a parent image
2
  FROM python:3.11-slim
3
 
 
 
 
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
  # Copy the current directory contents into the container at /app
8
- COPY . /app
9
 
10
  # Install any needed packages specified in requirements.txt
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
 
 
 
 
 
 
13
  # Make port 7860 available to the world outside this container
14
  EXPOSE 7860
15
 
16
  # Define environment variable
17
  ENV FLASK_APP=app.py
18
  ENV PYTHONUNBUFFERED=1
 
19
 
20
  # Run app.py when the container launches
21
- # Using python directly for simplicity with SQLite and ensuring single worker
22
  CMD ["python", "app.py"]
 
1
  # Use an official Python runtime as a parent image
2
  FROM python:3.11-slim
3
 
4
+ # Create a non-root user
5
+ RUN useradd -m -u 1000 user
6
+
7
  # Set the working directory in the container
8
  WORKDIR /app
9
 
10
  # Copy the current directory contents into the container at /app
11
+ COPY --chown=user . /app
12
 
13
  # Install any needed packages specified in requirements.txt
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
+ # Create instance directory and set permissions
17
+ RUN mkdir -p instance && chown -R user:user instance
18
+
19
+ # Switch to non-root user
20
+ USER user
21
+
22
  # Make port 7860 available to the world outside this container
23
  EXPOSE 7860
24
 
25
  # Define environment variable
26
  ENV FLASK_APP=app.py
27
  ENV PYTHONUNBUFFERED=1
28
+ ENV PATH="/home/user/.local/bin:$PATH"
29
 
30
  # Run app.py when the container launches
 
31
  CMD ["python", "app.py"]
app.py CHANGED
@@ -4,15 +4,19 @@ import sqlite3
4
  import requests
5
  import datetime
6
  import time
 
 
7
  from flask import Flask, render_template, request, jsonify, g
 
8
  from dotenv import load_dotenv
9
 
10
  # Load env
11
  load_dotenv()
12
 
13
  app = Flask(__name__, instance_relative_config=True)
14
- app.config['SECRET_KEY'] = 'dev-secret-key-eval-matrix'
15
  app.config['DATABASE'] = os.path.join(app.instance_path, 'eval_matrix.db')
 
16
 
17
  # Ensure instance folder exists
18
  try:
@@ -21,7 +25,7 @@ except OSError:
21
  pass
22
 
23
  # SiliconFlow Config
24
- SILICONFLOW_API_KEY = "sk-vimuseiptfbomzegyuvmebjzooncsqbyjtlddrfodzcdskgi"
25
  SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
26
  # Using Qwen 2.5 7B Instruct as the default judge/worker
27
  DEFAULT_MODEL = "Qwen/Qwen2.5-7B-Instruct"
@@ -208,6 +212,69 @@ def handle_test_cases(id):
208
  cur = db.execute('SELECT * FROM test_cases WHERE test_set_id = ?', (id,))
209
  return jsonify([dict(row) for row in cur.fetchall()])
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  # Evaluation Execution
212
  @app.route('/api/run_eval', methods=['POST'])
213
  def run_eval():
@@ -282,6 +349,18 @@ def get_run_details(id):
282
  "results": [dict(row) for row in results]
283
  })
284
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  if __name__ == '__main__':
286
  with app.app_context():
287
  init_db()
 
4
  import requests
5
  import datetime
6
  import time
7
+ import csv
8
+ import io
9
  from flask import Flask, render_template, request, jsonify, g
10
+ from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
12
 
13
  # Load env
14
  load_dotenv()
15
 
16
  app = Flask(__name__, instance_relative_config=True)
17
+ app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'dev-secret-key-eval-matrix')
18
  app.config['DATABASE'] = os.path.join(app.instance_path, 'eval_matrix.db')
19
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB Max Upload
20
 
21
  # Ensure instance folder exists
22
  try:
 
25
  pass
26
 
27
  # SiliconFlow Config
28
+ SILICONFLOW_API_KEY = os.getenv("SILICONFLOW_API_KEY", "sk-vimuseiptfbomzegyuvmebjzooncsqbyjtlddrfodzcdskgi")
29
  SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
30
  # Using Qwen 2.5 7B Instruct as the default judge/worker
31
  DEFAULT_MODEL = "Qwen/Qwen2.5-7B-Instruct"
 
212
  cur = db.execute('SELECT * FROM test_cases WHERE test_set_id = ?', (id,))
213
  return jsonify([dict(row) for row in cur.fetchall()])
214
 
215
+ @app.route('/api/test_sets/<int:id>/import', methods=['POST'])
216
+ def import_test_cases(id):
217
+ if 'file' not in request.files:
218
+ return jsonify({"error": "No file part"}), 400
219
+ file = request.files['file']
220
+ if file.filename == '':
221
+ return jsonify({"error": "No selected file"}), 400
222
+
223
+ if file:
224
+ filename = secure_filename(file.filename)
225
+ db = get_db()
226
+ count = 0
227
+
228
+ try:
229
+ # Parse File
230
+ if filename.endswith('.csv'):
231
+ stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
232
+ csv_input = csv.DictReader(stream)
233
+ # Check headers
234
+ if not 'prompt' in csv_input.fieldnames:
235
+ return jsonify({"error": "CSV must have a 'prompt' column"}), 400
236
+
237
+ cases = []
238
+ for row in csv_input:
239
+ cases.append((
240
+ id,
241
+ row.get('prompt'),
242
+ row.get('expected_output', ''),
243
+ row.get('criteria', '')
244
+ ))
245
+
246
+ if cases:
247
+ db.executemany('INSERT INTO test_cases (test_set_id, prompt, expected_output, criteria) VALUES (?, ?, ?, ?)', cases)
248
+ db.commit()
249
+ count = len(cases)
250
+
251
+ elif filename.endswith('.json'):
252
+ data = json.load(file)
253
+ if not isinstance(data, list):
254
+ return jsonify({"error": "JSON must be a list of objects"}), 400
255
+
256
+ cases = []
257
+ for item in data:
258
+ if 'prompt' in item:
259
+ cases.append((
260
+ id,
261
+ item.get('prompt'),
262
+ item.get('expected_output', ''),
263
+ item.get('criteria', '')
264
+ ))
265
+
266
+ if cases:
267
+ db.executemany('INSERT INTO test_cases (test_set_id, prompt, expected_output, criteria) VALUES (?, ?, ?, ?)', cases)
268
+ db.commit()
269
+ count = len(cases)
270
+ else:
271
+ return jsonify({"error": "Unsupported file type. Use .csv or .json"}), 400
272
+
273
+ except Exception as e:
274
+ return jsonify({"error": str(e)}), 500
275
+
276
+ return jsonify({"status": "success", "count": count})
277
+
278
  # Evaluation Execution
279
  @app.route('/api/run_eval', methods=['POST'])
280
  def run_eval():
 
349
  "results": [dict(row) for row in results]
350
  })
351
 
352
+ @app.errorhandler(413)
353
+ def request_entity_too_large(error):
354
+ return jsonify({"error": "File too large"}), 413
355
+
356
+ @app.errorhandler(500)
357
+ def internal_error(error):
358
+ return jsonify({"error": "Internal Server Error"}), 500
359
+
360
+ @app.errorhandler(404)
361
+ def not_found(error):
362
+ return jsonify({"error": "Not Found"}), 404
363
+
364
  if __name__ == '__main__':
365
  with app.app_context():
366
  init_db()
templates/index.html CHANGED
@@ -13,10 +13,11 @@
13
  body { background-color: #f3f4f6; }
14
  .fade-enter-active, .fade-leave-active { transition: opacity 0.3s ease; }
15
  .fade-enter-from, .fade-leave-to { opacity: 0; }
 
16
  </style>
17
  </head>
18
  <body>
19
- <div id="app" class="min-h-screen flex flex-col md:flex-row">
20
  <!-- Sidebar -->
21
  <aside class="bg-white w-full md:w-64 border-r border-gray-200 flex flex-col">
22
  <div class="p-6 border-b border-gray-100">
@@ -224,15 +225,26 @@
224
  <!-- Manage Cases Modal -->
225
  <div v-if="showManageCasesModal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
226
  <div class="bg-white p-6 rounded-xl w-[800px] h-[600px] flex flex-col">
227
- <h3 class="text-lg font-bold mb-4">Manage Cases: ${ activeSet.name }</h3>
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  <!-- Add Case Form -->
230
  <div class="grid grid-cols-3 gap-2 mb-4 bg-gray-50 p-3 rounded">
231
- <textarea v-model="newCase.prompt" placeholder="Prompt" class="border p-2 rounded text-sm h-20"></textarea>
232
- <textarea v-model="newCase.expected_output" placeholder="Expected Output (Optional)" class="border p-2 rounded text-sm h-20"></textarea>
233
  <div class="flex flex-col gap-2">
234
- <textarea v-model="newCase.criteria" placeholder="Criteria (e.g. Concise)" class="border p-2 rounded text-sm h-12"></textarea>
235
- <button @click="addCase" class="bg-green-600 text-white px-2 py-1 rounded text-sm">Add Case</button>
236
  </div>
237
  </div>
238
 
@@ -246,7 +258,7 @@
246
  </div>
247
 
248
  <div class="mt-4 flex justify-end">
249
- <button @click="showManageCasesModal = false" class="text-gray-500">Close</button>
250
  </div>
251
  </div>
252
  </div>
@@ -270,6 +282,7 @@
270
  const activeSetCases = ref([]);
271
  const newCase = ref({ prompt: '', expected_output: '', criteria: '' });
272
  const activeRun = ref(null);
 
273
 
274
  // Fetch Data
275
  const fetchData = async () => {
@@ -289,9 +302,11 @@
289
  const initData = async () => {
290
  await axios.post('/api/init');
291
  await fetchData();
 
292
  };
293
 
294
  const createTestSet = async () => {
 
295
  await axios.post('/api/test_sets', newSet.value);
296
  showCreateSetModal.value = false;
297
  newSet.value = { name: '', description: '' };
@@ -306,20 +321,66 @@
306
  };
307
 
308
  const addCase = async () => {
 
309
  await axios.post(`/api/test_sets/${activeSet.value.id}/cases`, newCase.value);
310
  const res = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
311
  activeSetCases.value = res.data;
312
  newCase.value = { prompt: '', expected_output: '', criteria: '' };
313
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
  const startRun = async (set) => {
316
- if(!confirm(`Run evaluation for ${set.name}? This will use SiliconFlow API.`)) return;
317
 
318
- // Optimistic update
319
- const res = await axios.post('/api/run_eval', { test_set_id: set.id, model_name: 'Qwen/Qwen2.5-7B-Instruct' });
320
- alert('Evaluation started! ID: ' + res.data.run_id);
321
- currentView.value = 'runs';
322
- fetchData();
 
 
 
 
323
  };
324
 
325
  const viewRunDetails = async (id) => {
@@ -418,10 +479,14 @@
418
  activeSetCases,
419
  newCase,
420
  activeRun,
 
421
  initData,
422
  createTestSet,
423
  manageCases,
424
  addCase,
 
 
 
425
  startRun,
426
  viewRunDetails,
427
  calculateGlobalAvg,
 
13
  body { background-color: #f3f4f6; }
14
  .fade-enter-active, .fade-leave-active { transition: opacity 0.3s ease; }
15
  .fade-enter-from, .fade-leave-to { opacity: 0; }
16
+ [v-cloak] { display: none; }
17
  </style>
18
  </head>
19
  <body>
20
+ <div id="app" v-cloak class="min-h-screen flex flex-col md:flex-row">
21
  <!-- Sidebar -->
22
  <aside class="bg-white w-full md:w-64 border-r border-gray-200 flex flex-col">
23
  <div class="p-6 border-b border-gray-100">
 
225
  <!-- Manage Cases Modal -->
226
  <div v-if="showManageCasesModal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
227
  <div class="bg-white p-6 rounded-xl w-[800px] h-[600px] flex flex-col">
228
+ <div class="flex justify-between items-center mb-4">
229
+ <h3 class="text-lg font-bold">管理用例: ${ activeSet.name }</h3>
230
+ <div class="flex gap-2">
231
+ <input type="file" ref="fileInput" @change="handleFileUpload" accept=".csv,.json" class="hidden">
232
+ <button @click="triggerUpload" class="bg-blue-600 text-white px-3 py-1 rounded hover:bg-blue-700 text-sm">
233
+ <i class="fa-solid fa-upload"></i> 导入用例 (CSV/JSON)
234
+ </button>
235
+ <button @click="downloadTemplate" class="border border-gray-300 text-gray-600 px-3 py-1 rounded hover:bg-gray-50 text-sm">
236
+ <i class="fa-solid fa-download"></i> 模板
237
+ </button>
238
+ </div>
239
+ </div>
240
 
241
  <!-- Add Case Form -->
242
  <div class="grid grid-cols-3 gap-2 mb-4 bg-gray-50 p-3 rounded">
243
+ <textarea v-model="newCase.prompt" placeholder="提示词 (Prompt)" class="border p-2 rounded text-sm h-20"></textarea>
244
+ <textarea v-model="newCase.expected_output" placeholder="预期输出 (Expected Output - 选填)" class="border p-2 rounded text-sm h-20"></textarea>
245
  <div class="flex flex-col gap-2">
246
+ <textarea v-model="newCase.criteria" placeholder="评测标准 (Criteria)" class="border p-2 rounded text-sm h-12"></textarea>
247
+ <button @click="addCase" class="bg-green-600 text-white px-2 py-1 rounded text-sm">添加用例</button>
248
  </div>
249
  </div>
250
 
 
258
  </div>
259
 
260
  <div class="mt-4 flex justify-end">
261
+ <button @click="showManageCasesModal = false" class="text-gray-500">关闭</button>
262
  </div>
263
  </div>
264
  </div>
 
282
  const activeSetCases = ref([]);
283
  const newCase = ref({ prompt: '', expected_output: '', criteria: '' });
284
  const activeRun = ref(null);
285
+ const fileInput = ref(null);
286
 
287
  // Fetch Data
288
  const fetchData = async () => {
 
302
  const initData = async () => {
303
  await axios.post('/api/init');
304
  await fetchData();
305
+ alert('重置成功');
306
  };
307
 
308
  const createTestSet = async () => {
309
+ if(!newSet.value.name) return alert('请输入名称');
310
  await axios.post('/api/test_sets', newSet.value);
311
  showCreateSetModal.value = false;
312
  newSet.value = { name: '', description: '' };
 
321
  };
322
 
323
  const addCase = async () => {
324
+ if(!newCase.value.prompt) return alert('提示词不能为空');
325
  await axios.post(`/api/test_sets/${activeSet.value.id}/cases`, newCase.value);
326
  const res = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
327
  activeSetCases.value = res.data;
328
  newCase.value = { prompt: '', expected_output: '', criteria: '' };
329
  };
330
+
331
+ const triggerUpload = () => {
332
+ fileInput.value.click();
333
+ };
334
+
335
+ const handleFileUpload = async (event) => {
336
+ const file = event.target.files[0];
337
+ if (!file) return;
338
+
339
+ const formData = new FormData();
340
+ formData.append('file', file);
341
+
342
+ try {
343
+ const res = await axios.post(`/api/test_sets/${activeSet.value.id}/import`, formData, {
344
+ headers: { 'Content-Type': 'multipart/form-data' }
345
+ });
346
+ alert(`成功导入 ${res.data.count} 条用例`);
347
+ // Refresh cases
348
+ const casesRes = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
349
+ activeSetCases.value = casesRes.data;
350
+ } catch (e) {
351
+ alert('导入失败: ' + (e.response?.data?.error || e.message));
352
+ }
353
+ // Reset input
354
+ event.target.value = '';
355
+ };
356
+
357
+ const downloadTemplate = () => {
358
+ const csvContent = "prompt,expected_output,criteria\n示例问题,示例预期回答,示例评分标准";
359
+ const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
360
+ const link = document.createElement("a");
361
+ if (link.download !== undefined) {
362
+ const url = URL.createObjectURL(blob);
363
+ link.setAttribute("href", url);
364
+ link.setAttribute("download", "template.csv");
365
+ link.style.visibility = 'hidden';
366
+ document.body.appendChild(link);
367
+ link.click();
368
+ document.body.removeChild(link);
369
+ }
370
+ };
371
 
372
  const startRun = async (set) => {
373
+ if(!confirm(`确认开始运行评测 "${set.name}"? 这将消耗 API Token。`)) return;
374
 
375
+ try {
376
+ // Optimistic update
377
+ const res = await axios.post('/api/run_eval', { test_set_id: set.id, model_name: 'Qwen/Qwen2.5-7B-Instruct' });
378
+ alert('评测已开始! ID: ' + res.data.run_id);
379
+ currentView.value = 'runs';
380
+ fetchData();
381
+ } catch (e) {
382
+ alert('启动失败: ' + (e.response?.data?.error || e.message));
383
+ }
384
  };
385
 
386
  const viewRunDetails = async (id) => {
 
479
  activeSetCases,
480
  newCase,
481
  activeRun,
482
+ fileInput,
483
  initData,
484
  createTestSet,
485
  manageCases,
486
  addCase,
487
+ triggerUpload,
488
+ handleFileUpload,
489
+ downloadTemplate,
490
  startRun,
491
  viewRunDetails,
492
  calculateGlobalAvg,