Spaces:

duqing026
/

process-xray-pro

Sleeping

Trae Assistant

Initial commit

3e57f30 27 days ago

10.4 kB

	import os
	import json
	import random
	import csv
	import io
	from datetime import datetime, timedelta
	from flask import Flask, request, jsonify, render_template

	# Try to import pandas, fallback to mock if unavailable (e.g. Python 3.14 env)
	try:
	import pandas as pd
	HAS_PANDAS = True
	except ImportError:
	HAS_PANDAS = False
	print("Warning: Pandas not found. Running in fallback mode.")

	app = Flask(__name__)
	app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max upload size

	# Configuration
	UPLOAD_FOLDER = '/tmp'
	ALLOWED_EXTENSIONS = {'csv'}

	def allowed_file(filename):
	return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	def generate_demo_data_raw():
	"""Generates demo data as list of dicts (Fallback)"""
	data = []
	activities = [
	"Create Purchase Requisition",
	"Approve Requisition",
	"Create Purchase Order",
	"Receive Goods",
	"Receive Invoice",
	"Match Invoice",
	"Pay Invoice",
	"Close Case"
	]

	# Generate 50 cases
	for i in range(1, 51):
	case_id = f"CASE-{i:03d}"
	current_time = datetime.now() - timedelta(days=random.randint(1, 30))

	path = activities[:]
	if random.random() < 0.2:
	path.remove("Approve Requisition")

	if random.random() < 0.1:
	idx = path.index("Receive Invoice")
	path.insert(idx + 1, "Reject Invoice")
	path.insert(idx + 2, "Receive Invoice")

	for activity in path:
	duration_minutes = random.randint(60, 2880)
	current_time += timedelta(minutes=duration_minutes)

	data.append({
	"case_id": case_id,
	"activity": activity,
	"timestamp": current_time.strftime("%Y-%m-%d %H:%M:%S")
	})
	return data

	@app.route('/')
	def index():
	return render_template('index.html')

	@app.route('/api/demo', methods=['GET'])
	def get_demo_data():
	if HAS_PANDAS:
	df = pd.DataFrame(generate_demo_data_raw())
	return df.to_csv(index=False)
	else:
	# Manual CSV generation
	data = generate_demo_data_raw()
	output = io.StringIO()
	if data:
	writer = csv.DictWriter(output, fieldnames=data[0].keys())
	writer.writeheader()
	writer.writerows(data)
	return output.getvalue()

	@app.route('/api/analyze', methods=['POST'])
	def analyze():
	try:
	if 'file' not in request.files:
	return jsonify({"error": "没有上传文件"}), 400

	file = request.files['file']
	if file.filename == '':
	return jsonify({"error": "未选择文件"}), 400

	if file and allowed_file(file.filename):
	# Process Logic
	nodes = {}
	links = {}
	total_cases = 0
	total_events = 0

	# Use Pandas if available, otherwise fallback
	if HAS_PANDAS:
	try:
	df = pd.read_csv(file)
	except Exception as e:
	return jsonify({"error": f"CSV读取失败: {str(e)}"}), 400

	cols = [c.lower() for c in df.columns]
	df.columns = cols

	case_col = next((c for c in cols if 'case' in c or 'id' in c), None)
	act_col = next((c for c in cols if 'activity' in c or 'event' in c or 'name' in c), None)
	time_col = next((c for c in cols if 'time' in c or 'date' in c), None)

	if not (case_col and act_col and time_col):
	return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400

	try:
	df[time_col] = pd.to_datetime(df[time_col])
	except:
	return jsonify({"error": "时间戳格式无效"}), 400

	df = df.sort_values(by=[case_col, time_col])

	cases = df.groupby(case_col)
	total_cases = len(cases)
	total_events = len(df)

	for case_id, group in cases:
	events = group.to_dict('records')
	process_case_events(events, nodes, links, act_col, time_col)

	else:
	# --- FALLBACK IMPLEMENTATION (Standard Lib) ---
	stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
	reader = csv.DictReader(stream)
	rows = list(reader)

	if not rows:
	return jsonify({"error": "空文件"}), 400

	# Detect columns
	headers = [h.lower() for h in reader.fieldnames]
	case_key = next((h for h in reader.fieldnames if 'case' in h.lower() or 'id' in h.lower()), None)
	act_key = next((h for h in reader.fieldnames if 'activity' in h.lower() or 'event' in h.lower() or 'name' in h.lower()), None)
	time_key = next((h for h in reader.fieldnames if 'time' in h.lower() or 'date' in h.lower()), None)

	if not (case_key and act_key and time_key):
	return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400

	# Group by Case
	case_map = {}
	for row in rows:
	c_id = row[case_key]
	if c_id not in case_map:
	case_map[c_id] = []
	case_map[c_id].append(row)

	total_cases = len(case_map)
	total_events = len(rows)

	# Sort and Process
	for c_id, events in case_map.items():
	# Parse dates
	for e in events:
	try:
	# Try ISO format first, then others
	e['_dt'] = datetime.fromisoformat(e[time_key].replace('Z', '+00:00'))
	except:
	try:
	e['_dt'] = datetime.strptime(e[time_key], "%Y-%m-%d %H:%M:%S")
	except:
	# Fallback for demo data format if generated locally
	e['_dt'] = datetime.now()

	events.sort(key=lambda x: x['_dt'])
	process_case_events(events, nodes, links, act_key, '_dt')

	# --- COMMON FORMATTING ---
	echarts_nodes = []
	max_count = 0
	for name, data in nodes.items():
	max_count = max(max_count, data["count"])

	for name, data in nodes.items():
	symbol_size = 20 + (data["count"] / max_count) * 40 if max_count > 0 else 30
	echarts_nodes.append({
	"name": name,
	"value": data["count"],
	"symbolSize": symbol_size,
	"itemStyle": {
	"color": "#5470c6" if data["in_degree"] > 0 and data["out_degree"] > 0 else ("#91cc75" if data["in_degree"] == 0 else "#ee6666")
	},
	"category": "Start" if data["in_degree"] == 0 else ("End" if data["out_degree"] == 0 else "Activity")
	})

	echarts_links = []
	for (source, target), data in links.items():
	avg_duration = data["total_duration"] / data["count"]
	echarts_links.append({
	"source": source,
	"target": target,
	"value": data["count"],
	"label": {
	"show": True,
	"formatter": f"{data['count']} ({avg_duration:.1f}h)"
	},
	"lineStyle": {
	"width": 1 + (data["count"] / total_cases) * 5,
	"curveness": 0.2
	}
	})

	return jsonify({
	"nodes": echarts_nodes,
	"links": echarts_links,
	"stats": {
	"total_cases": total_cases,
	"total_events": total_events,
	"avg_events_per_case": round(total_events / total_cases, 1) if total_cases else 0
	}
	})

	except Exception as e:
	import traceback
	traceback.print_exc()
	return jsonify({"error": str(e)}), 500

	def process_case_events(events, nodes, links, act_key, time_key):
	"""Helper to process a sorted list of events for a single case"""
	for i in range(len(events)):
	curr = events[i]
	act = curr[act_key]

	# Update Node
	if act not in nodes:
	nodes[act] = {"count": 0, "in_degree": 0, "out_degree": 0}
	nodes[act]["count"] += 1

	# Update Link
	if i < len(events) - 1:
	next_event = events[i+1]
	next_act = next_event[act_key]

	# Duration in hours
	t1 = curr[time_key]
	t2 = next_event[time_key]

	# Handle pandas timestamp vs python datetime
	if hasattr(t1, 'to_pydatetime'): t1 = t1.to_pydatetime()
	if hasattr(t2, 'to_pydatetime'): t2 = t2.to_pydatetime()

	duration = (t2 - t1).total_seconds() / 3600.0

	link_key = (act, next_act)
	if link_key not in links:
	links[link_key] = {"count": 0, "total_duration": 0.0}

	links[link_key]["count"] += 1
	links[link_key]["total_duration"] += duration

	nodes[act]["out_degree"] += 1

	if next_act not in nodes:
	nodes[next_act] = {"count": 0, "in_degree": 0, "out_degree": 0}
	nodes[next_act]["in_degree"] += 1

	if __name__ == '__main__':
	app.run(debug=True, host='0.0.0.0', port=7860)