Trae Assistant
Initial commit
3e57f30
import os
import json
import random
import csv
import io
from datetime import datetime, timedelta
from flask import Flask, request, jsonify, render_template
# Try to import pandas, fallback to mock if unavailable (e.g. Python 3.14 env)
try:
import pandas as pd
HAS_PANDAS = True
except ImportError:
HAS_PANDAS = False
print("Warning: Pandas not found. Running in fallback mode.")
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max upload size
# Configuration
UPLOAD_FOLDER = '/tmp'
ALLOWED_EXTENSIONS = {'csv'}
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def generate_demo_data_raw():
"""Generates demo data as list of dicts (Fallback)"""
data = []
activities = [
"Create Purchase Requisition",
"Approve Requisition",
"Create Purchase Order",
"Receive Goods",
"Receive Invoice",
"Match Invoice",
"Pay Invoice",
"Close Case"
]
# Generate 50 cases
for i in range(1, 51):
case_id = f"CASE-{i:03d}"
current_time = datetime.now() - timedelta(days=random.randint(1, 30))
path = activities[:]
if random.random() < 0.2:
path.remove("Approve Requisition")
if random.random() < 0.1:
idx = path.index("Receive Invoice")
path.insert(idx + 1, "Reject Invoice")
path.insert(idx + 2, "Receive Invoice")
for activity in path:
duration_minutes = random.randint(60, 2880)
current_time += timedelta(minutes=duration_minutes)
data.append({
"case_id": case_id,
"activity": activity,
"timestamp": current_time.strftime("%Y-%m-%d %H:%M:%S")
})
return data
@app.route('/')
def index():
return render_template('index.html')
@app.route('/api/demo', methods=['GET'])
def get_demo_data():
if HAS_PANDAS:
df = pd.DataFrame(generate_demo_data_raw())
return df.to_csv(index=False)
else:
# Manual CSV generation
data = generate_demo_data_raw()
output = io.StringIO()
if data:
writer = csv.DictWriter(output, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
return output.getvalue()
@app.route('/api/analyze', methods=['POST'])
def analyze():
try:
if 'file' not in request.files:
return jsonify({"error": "没有上传文件"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "未选择文件"}), 400
if file and allowed_file(file.filename):
# Process Logic
nodes = {}
links = {}
total_cases = 0
total_events = 0
# Use Pandas if available, otherwise fallback
if HAS_PANDAS:
try:
df = pd.read_csv(file)
except Exception as e:
return jsonify({"error": f"CSV读取失败: {str(e)}"}), 400
cols = [c.lower() for c in df.columns]
df.columns = cols
case_col = next((c for c in cols if 'case' in c or 'id' in c), None)
act_col = next((c for c in cols if 'activity' in c or 'event' in c or 'name' in c), None)
time_col = next((c for c in cols if 'time' in c or 'date' in c), None)
if not (case_col and act_col and time_col):
return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400
try:
df[time_col] = pd.to_datetime(df[time_col])
except:
return jsonify({"error": "时间戳格式无效"}), 400
df = df.sort_values(by=[case_col, time_col])
cases = df.groupby(case_col)
total_cases = len(cases)
total_events = len(df)
for case_id, group in cases:
events = group.to_dict('records')
process_case_events(events, nodes, links, act_col, time_col)
else:
# --- FALLBACK IMPLEMENTATION (Standard Lib) ---
stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
reader = csv.DictReader(stream)
rows = list(reader)
if not rows:
return jsonify({"error": "空文件"}), 400
# Detect columns
headers = [h.lower() for h in reader.fieldnames]
case_key = next((h for h in reader.fieldnames if 'case' in h.lower() or 'id' in h.lower()), None)
act_key = next((h for h in reader.fieldnames if 'activity' in h.lower() or 'event' in h.lower() or 'name' in h.lower()), None)
time_key = next((h for h in reader.fieldnames if 'time' in h.lower() or 'date' in h.lower()), None)
if not (case_key and act_key and time_key):
return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400
# Group by Case
case_map = {}
for row in rows:
c_id = row[case_key]
if c_id not in case_map:
case_map[c_id] = []
case_map[c_id].append(row)
total_cases = len(case_map)
total_events = len(rows)
# Sort and Process
for c_id, events in case_map.items():
# Parse dates
for e in events:
try:
# Try ISO format first, then others
e['_dt'] = datetime.fromisoformat(e[time_key].replace('Z', '+00:00'))
except:
try:
e['_dt'] = datetime.strptime(e[time_key], "%Y-%m-%d %H:%M:%S")
except:
# Fallback for demo data format if generated locally
e['_dt'] = datetime.now()
events.sort(key=lambda x: x['_dt'])
process_case_events(events, nodes, links, act_key, '_dt')
# --- COMMON FORMATTING ---
echarts_nodes = []
max_count = 0
for name, data in nodes.items():
max_count = max(max_count, data["count"])
for name, data in nodes.items():
symbol_size = 20 + (data["count"] / max_count) * 40 if max_count > 0 else 30
echarts_nodes.append({
"name": name,
"value": data["count"],
"symbolSize": symbol_size,
"itemStyle": {
"color": "#5470c6" if data["in_degree"] > 0 and data["out_degree"] > 0 else ("#91cc75" if data["in_degree"] == 0 else "#ee6666")
},
"category": "Start" if data["in_degree"] == 0 else ("End" if data["out_degree"] == 0 else "Activity")
})
echarts_links = []
for (source, target), data in links.items():
avg_duration = data["total_duration"] / data["count"]
echarts_links.append({
"source": source,
"target": target,
"value": data["count"],
"label": {
"show": True,
"formatter": f"{data['count']} ({avg_duration:.1f}h)"
},
"lineStyle": {
"width": 1 + (data["count"] / total_cases) * 5,
"curveness": 0.2
}
})
return jsonify({
"nodes": echarts_nodes,
"links": echarts_links,
"stats": {
"total_cases": total_cases,
"total_events": total_events,
"avg_events_per_case": round(total_events / total_cases, 1) if total_cases else 0
}
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({"error": str(e)}), 500
def process_case_events(events, nodes, links, act_key, time_key):
"""Helper to process a sorted list of events for a single case"""
for i in range(len(events)):
curr = events[i]
act = curr[act_key]
# Update Node
if act not in nodes:
nodes[act] = {"count": 0, "in_degree": 0, "out_degree": 0}
nodes[act]["count"] += 1
# Update Link
if i < len(events) - 1:
next_event = events[i+1]
next_act = next_event[act_key]
# Duration in hours
t1 = curr[time_key]
t2 = next_event[time_key]
# Handle pandas timestamp vs python datetime
if hasattr(t1, 'to_pydatetime'): t1 = t1.to_pydatetime()
if hasattr(t2, 'to_pydatetime'): t2 = t2.to_pydatetime()
duration = (t2 - t1).total_seconds() / 3600.0
link_key = (act, next_act)
if link_key not in links:
links[link_key] = {"count": 0, "total_duration": 0.0}
links[link_key]["count"] += 1
links[link_key]["total_duration"] += duration
nodes[act]["out_degree"] += 1
if next_act not in nodes:
nodes[next_act] = {"count": 0, "in_degree": 0, "out_degree": 0}
nodes[next_act]["in_degree"] += 1
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=7860)