Spaces:

duqing026
/

eval-matrix-agent

Sleeping

App Files Files Community

Trae Assistant commited on Feb 10

Commit

d8467fb

1 Parent(s): 243bd47

feat: optimize functionality, add file upload, improve UI and i18n

Browse files

Files changed (4) hide show

.gitignore +5 -0
Dockerfile +11 -2
app.py +81 -2
templates/index.html +78 -13

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+instance/
+__pycache__/
+*.pyc
+.env
+test_api.py

Dockerfile CHANGED Viewed

@@ -1,22 +1,31 @@
 # Use an official Python runtime as a parent image
 FROM python:3.11-slim
 # Set the working directory in the container
 WORKDIR /app
 # Copy the current directory contents into the container at /app
-COPY . /app
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Make port 7860 available to the world outside this container
 EXPOSE 7860
 # Define environment variable
 ENV FLASK_APP=app.py
 ENV PYTHONUNBUFFERED=1
 # Run app.py when the container launches
-# Using python directly for simplicity with SQLite and ensuring single worker
 CMD ["python", "app.py"]

 # Use an official Python runtime as a parent image
 FROM python:3.11-slim
+# Create a non-root user
+RUN useradd -m -u 1000 user
 # Set the working directory in the container
 WORKDIR /app
 # Copy the current directory contents into the container at /app
+COPY --chown=user . /app
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
+# Create instance directory and set permissions
+RUN mkdir -p instance && chown -R user:user instance
+# Switch to non-root user
+USER user
 # Make port 7860 available to the world outside this container
 EXPOSE 7860
 # Define environment variable
 ENV FLASK_APP=app.py
 ENV PYTHONUNBUFFERED=1
+ENV PATH="/home/user/.local/bin:$PATH"
 # Run app.py when the container launches
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -4,15 +4,19 @@ import sqlite3
 import requests
 import datetime
 import time
 from flask import Flask, render_template, request, jsonify, g
 from dotenv import load_dotenv
 # Load env
 load_dotenv()
 app = Flask(__name__, instance_relative_config=True)
-app.config['SECRET_KEY'] = 'dev-secret-key-eval-matrix'
 app.config['DATABASE'] = os.path.join(app.instance_path, 'eval_matrix.db')
 # Ensure instance folder exists
 try:
@@ -21,7 +25,7 @@ except OSError:
     pass
 # SiliconFlow Config
-SILICONFLOW_API_KEY = "sk-vimuseiptfbomzegyuvmebjzooncsqbyjtlddrfodzcdskgi"
 SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
 # Using Qwen 2.5 7B Instruct as the default judge/worker
 DEFAULT_MODEL = "Qwen/Qwen2.5-7B-Instruct"
@@ -208,6 +212,69 @@ def handle_test_cases(id):
     cur = db.execute('SELECT * FROM test_cases WHERE test_set_id = ?', (id,))
     return jsonify([dict(row) for row in cur.fetchall()])
 # Evaluation Execution
 @app.route('/api/run_eval', methods=['POST'])
 def run_eval():
@@ -282,6 +349,18 @@ def get_run_details(id):
         "results": [dict(row) for row in results]
     })
 if __name__ == '__main__':
     with app.app_context():
         init_db()

 import requests
 import datetime
 import time
+import csv
+import io
 from flask import Flask, render_template, request, jsonify, g
+from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
 # Load env
 load_dotenv()
 app = Flask(__name__, instance_relative_config=True)
+app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'dev-secret-key-eval-matrix')
 app.config['DATABASE'] = os.path.join(app.instance_path, 'eval_matrix.db')
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB Max Upload
 # Ensure instance folder exists
 try:
     pass
 # SiliconFlow Config
+SILICONFLOW_API_KEY = os.getenv("SILICONFLOW_API_KEY", "sk-vimuseiptfbomzegyuvmebjzooncsqbyjtlddrfodzcdskgi")
 SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
 # Using Qwen 2.5 7B Instruct as the default judge/worker
 DEFAULT_MODEL = "Qwen/Qwen2.5-7B-Instruct"
     cur = db.execute('SELECT * FROM test_cases WHERE test_set_id = ?', (id,))
     return jsonify([dict(row) for row in cur.fetchall()])
+@app.route('/api/test_sets/<int:id>/import', methods=['POST'])
+def import_test_cases(id):
+    if 'file' not in request.files:
+        return jsonify({"error": "No file part"}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    if file:
+        filename = secure_filename(file.filename)
+        db = get_db()
+        count = 0
+        try:
+            # Parse File
+            if filename.endswith('.csv'):
+                stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
+                csv_input = csv.DictReader(stream)
+                # Check headers
+                if not 'prompt' in csv_input.fieldnames:
+                     return jsonify({"error": "CSV must have a 'prompt' column"}), 400
+                cases = []
+                for row in csv_input:
+                    cases.append((
+                        id,
+                        row.get('prompt'),
+                        row.get('expected_output', ''),
+                        row.get('criteria', '')
+                    ))
+                if cases:
+                    db.executemany('INSERT INTO test_cases (test_set_id, prompt, expected_output, criteria) VALUES (?, ?, ?, ?)', cases)
+                    db.commit()
+                    count = len(cases)
+            elif filename.endswith('.json'):
+                data = json.load(file)
+                if not isinstance(data, list):
+                    return jsonify({"error": "JSON must be a list of objects"}), 400
+                cases = []
+                for item in data:
+                    if 'prompt' in item:
+                        cases.append((
+                            id,
+                            item.get('prompt'),
+                            item.get('expected_output', ''),
+                            item.get('criteria', '')
+                        ))
+                if cases:
+                    db.executemany('INSERT INTO test_cases (test_set_id, prompt, expected_output, criteria) VALUES (?, ?, ?, ?)', cases)
+                    db.commit()
+                    count = len(cases)
+            else:
+                return jsonify({"error": "Unsupported file type. Use .csv or .json"}), 400
+        except Exception as e:
+            return jsonify({"error": str(e)}), 500
+        return jsonify({"status": "success", "count": count})
 # Evaluation Execution
 @app.route('/api/run_eval', methods=['POST'])
 def run_eval():
         "results": [dict(row) for row in results]
     })
+@app.errorhandler(413)
+def request_entity_too_large(error):
+    return jsonify({"error": "File too large"}), 413
+@app.errorhandler(500)
+def internal_error(error):
+    return jsonify({"error": "Internal Server Error"}), 500
+@app.errorhandler(404)
+def not_found(error):
+    return jsonify({"error": "Not Found"}), 404
 if __name__ == '__main__':
     with app.app_context():
         init_db()

templates/index.html CHANGED Viewed

@@ -13,10 +13,11 @@
         body { background-color: #f3f4f6; }
         .fade-enter-active, .fade-leave-active { transition: opacity 0.3s ease; }
         .fade-enter-from, .fade-leave-to { opacity: 0; }
     </style>
 </head>
 <body>
-    <div id="app" class="min-h-screen flex flex-col md:flex-row">
         <!-- Sidebar -->
         <aside class="bg-white w-full md:w-64 border-r border-gray-200 flex flex-col">
             <div class="p-6 border-b border-gray-100">
@@ -224,15 +225,26 @@
             <!-- Manage Cases Modal -->
             <div v-if="showManageCasesModal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
                 <div class="bg-white p-6 rounded-xl w-[800px] h-[600px] flex flex-col">
-                    <h3 class="text-lg font-bold mb-4">Manage Cases: ${ activeSet.name }</h3>
                     <!-- Add Case Form -->
                     <div class="grid grid-cols-3 gap-2 mb-4 bg-gray-50 p-3 rounded">
-                        <textarea v-model="newCase.prompt" placeholder="Prompt" class="border p-2 rounded text-sm h-20"></textarea>
-                        <textarea v-model="newCase.expected_output" placeholder="Expected Output (Optional)" class="border p-2 rounded text-sm h-20"></textarea>
                         <div class="flex flex-col gap-2">
-                            <textarea v-model="newCase.criteria" placeholder="Criteria (e.g. Concise)" class="border p-2 rounded text-sm h-12"></textarea>
-                            <button @click="addCase" class="bg-green-600 text-white px-2 py-1 rounded text-sm">Add Case</button>
                         </div>
                     </div>
@@ -246,7 +258,7 @@
                     </div>
                     <div class="mt-4 flex justify-end">
-                        <button @click="showManageCasesModal = false" class="text-gray-500">Close</button>
                     </div>
                 </div>
             </div>
@@ -270,6 +282,7 @@
                 const activeSetCases = ref([]);
                 const newCase = ref({ prompt: '', expected_output: '', criteria: '' });
                 const activeRun = ref(null);
                 // Fetch Data
                 const fetchData = async () => {
@@ -289,9 +302,11 @@
                 const initData = async () => {
                     await axios.post('/api/init');
                     await fetchData();
                 };
                 const createTestSet = async () => {
                     await axios.post('/api/test_sets', newSet.value);
                     showCreateSetModal.value = false;
                     newSet.value = { name: '', description: '' };
@@ -306,20 +321,66 @@
                 };
                 const addCase = async () => {
                     await axios.post(`/api/test_sets/${activeSet.value.id}/cases`, newCase.value);
                     const res = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
                     activeSetCases.value = res.data;
                     newCase.value = { prompt: '', expected_output: '', criteria: '' };
                 };
                 const startRun = async (set) => {
-                    if(!confirm(`Run evaluation for ${set.name}? This will use SiliconFlow API.`)) return;
-                    // Optimistic update
-                    const res = await axios.post('/api/run_eval', { test_set_id: set.id, model_name: 'Qwen/Qwen2.5-7B-Instruct' });
-                    alert('Evaluation started! ID: ' + res.data.run_id);
-                    currentView.value = 'runs';
-                    fetchData();
                 };
                 const viewRunDetails = async (id) => {
@@ -418,10 +479,14 @@
                     activeSetCases,
                     newCase,
                     activeRun,
                     initData,
                     createTestSet,
                     manageCases,
                     addCase,
                     startRun,
                     viewRunDetails,
                     calculateGlobalAvg,

         body { background-color: #f3f4f6; }
         .fade-enter-active, .fade-leave-active { transition: opacity 0.3s ease; }
         .fade-enter-from, .fade-leave-to { opacity: 0; }
+        [v-cloak] { display: none; }
     </style>
 </head>
 <body>
+    <div id="app" v-cloak class="min-h-screen flex flex-col md:flex-row">
         <!-- Sidebar -->
         <aside class="bg-white w-full md:w-64 border-r border-gray-200 flex flex-col">
             <div class="p-6 border-b border-gray-100">
             <!-- Manage Cases Modal -->
             <div v-if="showManageCasesModal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
                 <div class="bg-white p-6 rounded-xl w-[800px] h-[600px] flex flex-col">
+                    <div class="flex justify-between items-center mb-4">
+                        <h3 class="text-lg font-bold">管理用例: ${ activeSet.name }</h3>
+                        <div class="flex gap-2">
+                             <input type="file" ref="fileInput" @change="handleFileUpload" accept=".csv,.json" class="hidden">
+                             <button @click="triggerUpload" class="bg-blue-600 text-white px-3 py-1 rounded hover:bg-blue-700 text-sm">
+                                <i class="fa-solid fa-upload"></i> 导入用例 (CSV/JSON)
+                             </button>
+                             <button @click="downloadTemplate" class="border border-gray-300 text-gray-600 px-3 py-1 rounded hover:bg-gray-50 text-sm">
+                                <i class="fa-solid fa-download"></i> 模板
+                             </button>
+                        </div>
+                    </div>
                     <!-- Add Case Form -->
                     <div class="grid grid-cols-3 gap-2 mb-4 bg-gray-50 p-3 rounded">
+                        <textarea v-model="newCase.prompt" placeholder="提示词 (Prompt)" class="border p-2 rounded text-sm h-20"></textarea>
+                        <textarea v-model="newCase.expected_output" placeholder="预期输出 (Expected Output - 选填)" class="border p-2 rounded text-sm h-20"></textarea>
                         <div class="flex flex-col gap-2">
+                            <textarea v-model="newCase.criteria" placeholder="评测标准 (Criteria)" class="border p-2 rounded text-sm h-12"></textarea>
+                            <button @click="addCase" class="bg-green-600 text-white px-2 py-1 rounded text-sm">添加用例</button>
                         </div>
                     </div>
                     </div>
                     <div class="mt-4 flex justify-end">
+                        <button @click="showManageCasesModal = false" class="text-gray-500">关闭</button>
                     </div>
                 </div>
             </div>
                 const activeSetCases = ref([]);
                 const newCase = ref({ prompt: '', expected_output: '', criteria: '' });
                 const activeRun = ref(null);
+                const fileInput = ref(null);
                 // Fetch Data
                 const fetchData = async () => {
                 const initData = async () => {
                     await axios.post('/api/init');
                     await fetchData();
+                    alert('重置成功');
                 };
                 const createTestSet = async () => {
+                    if(!newSet.value.name) return alert('请输入名称');
                     await axios.post('/api/test_sets', newSet.value);
                     showCreateSetModal.value = false;
                     newSet.value = { name: '', description: '' };
                 };
                 const addCase = async () => {
+                    if(!newCase.value.prompt) return alert('提示词不能为空');
                     await axios.post(`/api/test_sets/${activeSet.value.id}/cases`, newCase.value);
                     const res = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
                     activeSetCases.value = res.data;
                     newCase.value = { prompt: '', expected_output: '', criteria: '' };
                 };
+                const triggerUpload = () => {
+                    fileInput.value.click();
+                };
+                const handleFileUpload = async (event) => {
+                    const file = event.target.files[0];
+                    if (!file) return;
+                    const formData = new FormData();
+                    formData.append('file', file);
+                    try {
+                        const res = await axios.post(`/api/test_sets/${activeSet.value.id}/import`, formData, {
+                            headers: { 'Content-Type': 'multipart/form-data' }
+                        });
+                        alert(`成功导入 ${res.data.count} 条用例`);
+                        // Refresh cases
+                        const casesRes = await axios.get(`/api/test_sets/${activeSet.value.id}/cases`);
+                        activeSetCases.value = casesRes.data;
+                    } catch (e) {
+                        alert('导入失败: ' + (e.response?.data?.error || e.message));
+                    }
+                    // Reset input
+                    event.target.value = '';
+                };
+                const downloadTemplate = () => {
+                    const csvContent = "prompt,expected_output,criteria\n示例问题,示例预期回答,示例评分标准";
+                    const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
+                    const link = document.createElement("a");
+                    if (link.download !== undefined) {
+                        const url = URL.createObjectURL(blob);
+                        link.setAttribute("href", url);
+                        link.setAttribute("download", "template.csv");
+                        link.style.visibility = 'hidden';
+                        document.body.appendChild(link);
+                        link.click();
+                        document.body.removeChild(link);
+                    }
+                };
                 const startRun = async (set) => {
+                    if(!confirm(`确认开始运行评测 "${set.name}"? 这将消耗 API Token。`)) return;
+                    try {
+                        // Optimistic update
+                        const res = await axios.post('/api/run_eval', { test_set_id: set.id, model_name: 'Qwen/Qwen2.5-7B-Instruct' });
+                        alert('评测已开始! ID: ' + res.data.run_id);
+                        currentView.value = 'runs';
+                        fetchData();
+                    } catch (e) {
+                         alert('启动失败: ' + (e.response?.data?.error || e.message));
+                    }
                 };
                 const viewRunDetails = async (id) => {
                     activeSetCases,
                     newCase,
                     activeRun,
+                    fileInput,
                     initData,
                     createTestSet,
                     manageCases,
                     addCase,
+                    triggerUpload,
+                    handleFileUpload,
+                    downloadTemplate,
                     startRun,
                     viewRunDetails,
                     calculateGlobalAvg,