Spaces:

Kanompung
/

Unicorm

Sleeping

App Files Files Community

Unicorm / app.py

Kanompung

Update app.py

256e92e verified 7 months ago

raw

history blame contribute delete

23 kB

	import gradio as gr
	import cv2
	import threading
	import time
	import os
	from datetime import datetime
	import tempfile
	import math
	import pandas as pd
	import uuid
	from openai import OpenAI
	import os
	from dotenv import load_dotenv

	load_dotenv()

	# OpenAI Client Setup
	client = OpenAI(
	api_key=os.getenv("API_KEY")
	)

	# Your system prompt - replace this with your actual prompt
	SYSTEM_PROMPT = """
	คุณเป็นผู้ช่วยวิเคราะห์วิดีโอขั้นสูง ที่เชี่ยวชาญการแปลผลการจำแนกการกระทำของมนุษย์จากแบบจำลอง VideoMAE โดยมีหน้าที่วิเคราะห์ผลลัพธ์และให้คำอธิบายเชิงลึก พร้อมข้อเสนอแนะสำหรับผู้ใช้งาน

	## โครงสร้างข้อมูลนำเข้า
	คุณจะได้รับข้อมูลต่อไปนี้:
	1. ค่าความเชื่อมั่นของแต่ละการกระทำ (12 ประเภท)
	2. คะแนนความมั่นใจในการตรวจจับการเคลื่อนไหว
	3. ตัวอย่างเฟรมจากวิดีโอ
	4. ผลการเปรียบเทียบการกระทำคู่ตรงข้าม

	## ส่วนประกอบการวิเคราะห์

	### 1. ภาพรวมสรุปผล
	- สรุปผลการวิเคราะห์โดยย่อ
	- ระบุว่าพบการเคลื่อนไหวสำคัญหรือไม่
	- เน้นการกระทำที่เด่นชัดที่สุด

	### 2. วิเคราะห์การกระทำแบบละเอียด
	สำหรับแต่ละคู่การกระทำ (6 คู่):
	- เปรียบเทียบคะแนนความเชื่อมั่น
	- อธิบายความหมายของการกระทำที่เด่นกว่า
	- เสนอผลกระทบในทางปฏิบัติ

	### 3. การประเมินพลวัตการเคลื่อนไหว
	วิเคราะห์รูปแบบรวมเพื่อระบุ:
	- ทิศทางการเคลื่อนที่ (ไปข้างหน้า/ถอยหลัง, ขึ้น/ลง)
	- แนวโน้มการขยาย/หดตัว
	- การเปลี่ยนแปลงความเร็วและแรงกด

	### 4. ข้อเสนอแนะเชิงมืออาชีพ
	ปรับตามบริบทการใช้งาน:
	- การวิเคราะห์กีฬา: ข้อเสนอแนะเพื่อปรับปรุงเทคนิค
	- กล้องรักษาความปลอดภัย: ระบุการเคลื่อนไหวที่น่าสงสัย
	- สุขภาพ/สรีรศาสตร์: แนะนำการปรับท่าทาง
	- ข้อเสนอแนะทั่วไปเพื่อการเคลื่อนไหวที่มีประสิทธิภาพ

	### 5. หมายเหตุทางเทคนิค
	- ระบุข้อจำกัด (เช่น คะแนนความเชื่อมั่นต่ำ, สัญญาณรบกวน)
	- เงื่อนไขวิดีโอที่เหมาะสมสำหรับการวิเคราะห์ที่แม่นยำ
	- ความสัมพันธ์ที่น่าสนใจระหว่างการกระทำต่างๆ

	### 6. การสร้างรายงาน
	จัดรูปแบบเนื้อหาเป็น Markdown พร้อม:
	- หัวข้อที่ชัดเจน
	- ข้อความเน้นสำหรับผลลัพธ์สำคัญ
	- ภาษาที่เหมาะสมกับกลุ่มผู้ใช้งาน

	---

	## รูปแบบผลลัพธ์

	```markdown
	# รายงานวิเคราะห์การเคลื่อนไหวจากวิดีโอ

	## ภาพรวมสรุปผล
	[สรุปผลการวิเคราะห์โดยย่อ]

	## การวิเคราะห์การกระทำ
	### 1. [ชื่อคู่การกระทำ 1]
	- ผลการเปรียบเทียบ: [การกระทำ 1] ได้ X.XX เทียบกับ [การกระทำ 2] ได้ X.XX
	- การตีความ: [คำอธิบายความหมายของการเคลื่อนไหว]
	- ผลลัพธ์ที่อาจเกิดขึ้น: [ผลกระทบในทางปฏิบัติ]

	[... ทำซ้ำสำหรับทุกคู่การกระทำ]

	## โปรไฟล์การเคลื่อนไหวรวม
	[การวิเคราะห์แบบบูรณาการจากทุกการกระทำ]

	## ข้อเสนอแนะ
	1. [ข้อเสนอแนะแรก]
	2. [ข้อเสนอแนะที่สอง]
	3. [ข้อเสนอแนะที่สาม]

	## หมายเหตุทางเทคนิค
	[ข้อสังเกตเกี่ยวกับผลการวิเคราะห์]
	```

	---

	## ตัวอย่างผลลัพธ์ (ภาษาไทย)

	```markdown
	# รายงานวิเคราะห์การเคลื่อนไหวจากวิดีโอ

	## ภาพรวมสรุปผล
	การวิเคราะห์พบรูปแบบการเคลื่อนไหวไปข้างหน้าอย่างชัดเจน (คะแนน 0.87) ร่วมกับการเพิ่มแรงกด (0.82) สอดคล้องกับท่าเตะในกีฬามวยไทย

	## การวิเคราะห์การกระทำ
	### 1. การเคลื่อนไปข้างหน้า vs การถอยหลัง
	- ผลการเปรียบเทียบ: เคลื่อนไปข้างหน้าได้ 0.87 เทียบกับถอยหลังได้ 0.12
	- การตีความ: ผู้ถูกวิเคราะห์เคลื่อนที่เข้าหาคู่ต่อสู้อย่างเด่นชัด
	- ผลลัพธ์ที่อาจเกิดขึ้น: แสดงถึงกลยุทธ์การบุกที่รุนแรง

	[...]

	## โปรไฟล์การเคลื่อนไหวรวม
	พบว่า:
	- การเคลื่อนที่ไปข้างหน้าอย่างรวดเร็ว
	- การถ่ายเทน้ำหนักตัวมีจังหวะสม่ำเสมอ
	- การเร่งความเร็วในช่วงชกต่อย

	## ข้อเสนอแนะ
	1. ฝึกการเคลื่อนที่ถอยหลังเพื่อการป้องกัน
	2. ปรับการทรงตัวระหว่างเตะเพื่อความมั่นคง
	3. ระวังการเปิดช่องว่างเมื่อโจมตี

	## หมายเหตุทางเทคนิค
	ความเชื่อมั่นโดยรวมสูง (0.92) แสงจากด้านข้างอาจทำให้การคำนวณการไหลของภาพคลาดเคลื่อนเล็กน้อย
	"""

	ACTION_COLS = [
	'Advancing', 'Retreating', 'Enclosing', 'Spreading', 'Rising',
	'Descending', 'Directing', 'Indirecting', 'Increasing Pressure',
	'Decreasing Pressure', 'Acceleration', 'Decelerating'
	]

	recording = False
	recording_thread = None

	# ================= AI SECTION ==================
	def format_time(seconds):
	minutes = int(seconds // 60)
	seconds = int(seconds % 60)
	return f"{minutes:02d}:{seconds:02d}"

	def parse_time_str(tstr):
	m, s = map(int, tstr.split(":"))
	return m * 60 + s

	def send_to_api(file_path, start_sec, end_sec, actions):
	print(f"📤 Sending {file_path} to API...")
	print(f"⏱ Clip time: {start_sec}-{end_sec} sec")
	print(f"📝 Actions in this clip: {actions}")

	def record_video(actions_state, is_recording, recording_start_time):
	global recording
	cap = cv2.VideoCapture(0)
	fps = int(cap.get(cv2.CAP_PROP_FPS) or 24)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	clip_idx = 0

	while recording:
	filename = os.path.join(tempfile.gettempdir(), f"{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4")
	out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

	clip_start = time.time()
	while time.time() - clip_start < 5 and recording:
	ret, frame = cap.read()
	if ret:
	out.write(frame)

	out.release()

	elapsed_start = int(math.floor(clip_idx * 5))
	elapsed_end = int(math.floor((clip_idx + 1) * 5))
	clip_idx += 1

	actions_in_clip = []
	for item in actions_state:
	t_sec = parse_time_str(item["timestamp"])
	if elapsed_start <= t_sec < elapsed_end:
	actions_in_clip.extend(item["actions"])
	actions_in_clip = list(set(actions_in_clip))

	if recording:
	send_to_api(filename, elapsed_start, elapsed_end, actions_in_clip)

	cap.release()

	def toggle_recording(state, is_recording, recording_start_time, actions_state, accept_visible):
	global recording, recording_thread
	if state == "Start":
	if not recording:
	recording = True
	recording_thread = threading.Thread(
	target=record_video, args=(actions_state.copy(), is_recording, recording_start_time)
	)
	recording_thread.start()
	return "Stop", True, time.time(), gr.update(visible=False)
	else:
	recording = False
	if recording_thread is not None:
	recording_thread.join()
	return "Start", False, None, gr.update(visible=True)

	# ================= HUMAN FEEDBACK SECTION ==================
	def add_action(selected, actions_state, is_recording, recording_start_time, edit_index):
	if is_recording and recording_start_time:
	elapsed = time.time() - recording_start_time
	else:
	elapsed = 0
	if selected:
	actions_state.append({
	"id": str(uuid.uuid4()),
	"timestamp": format_time(elapsed),
	"actions": selected
	})
	return actions_state, None, None

	def clear_actions():
	return [], None, None

	def render_table(actions_state, edit_index):
	table = []
	for idx, item in enumerate(actions_state):
	table.append([idx, item["timestamp"], ", ".join(item["actions"])])
	return gr.Dataframe(
	headers=["Index", "Timestamp", "Actions"],
	value=table,
	interactive=False,
	wrap=True
	)

	def start_edit(idx, actions_state, new_actions=None):
	try:
	idx = int(idx)
	if 0 <= idx < len(actions_state):
	# ถ้ามี new_actions ให้บันทึกเลย
	if new_actions is not None:
	actions_state[idx]["actions"] = new_actions
	return idx, actions_state[idx]["actions"], actions_state
	else:
	return None, [], actions_state
	except Exception:
	return None, [], actions_state


	def clean_markdown_response(response_text):
	"""Clean markdown code block markers from LLM response"""
	import re

	# Remove markdown code block markers at the beginning and end
	# This handles cases like ```markdown, ```md, or just ```
	response_text = re.sub(r'^```(?:markdown\|md)?\s*\n?', '', response_text, flags=re.IGNORECASE \| re.MULTILINE)
	response_text = re.sub(r'\n?```\s*$', '', response_text, flags=re.MULTILINE)

	# Also handle cases where there might be extra whitespace
	response_text = response_text.strip()

	return response_text

	def LLM_Summarize(actions_state):
	"""Generate LLM summary of the recorded actions using OpenAI GPT-4o mini"""
	try:
	# Generate CSV data
	rows = []
	for item in actions_state:
	row = {
	"id": item.get("id", str(uuid.uuid4())),
	"Filename": "webcam_clip",
	"Time": "t" + item["timestamp"].replace(":", "")
	}
	for act in ACTION_COLS:
	row[act] = 1 if act in item["actions"] else 0
	rows.append(row)

	if not rows:
	return "No actions recorded yet. Start recording and add some actions to generate a summary."

	df = pd.DataFrame(rows)
	columns = ["id", "Filename", "Time"] + ACTION_COLS
	df = df[columns]

	# Convert to CSV string for LLM analysis
	csv_data = df.to_csv(index=False)

	# Create the prompt with your system prompt and CSV data
	prompt = f"{SYSTEM_PROMPT}\n\nHere is the CSV data to analyze:\n\n{csv_data}"


	response = client.chat.completions.create(
	model="typhoon-v2.1-12b-instruct", # Updated to use Typhoon model
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Here is the CSV data to analyze:\n\n{csv_data}"}
	],
	max_tokens=2000, # Adjust as needed
	temperature=0.7 # Adjust for creativity vs consistency
	)

	# Clean the response to remove markdown code block markers
	raw_response = response.choices[0].message.content
	cleaned_response = clean_markdown_response(raw_response)

	return cleaned_response

	except Exception as e:
	return f"Error generating summary: {str(e)}\n\nPlease check your OpenAI API key and connection."

	def accept_labels(actions_state):
	"""Generate and save CSV file"""
	rows = []
	for item in actions_state:
	row = {
	"id": item.get("id", str(uuid.uuid4())),
	"Filename": "webcam_clip",
	"Time": "t" + item["timestamp"].replace(":", "")
	}
	for act in ACTION_COLS:
	row[act] = 1 if act in item["actions"] else 0
	rows.append(row)
	df = pd.DataFrame(rows)
	columns = ["id", "Filename", "Time"] + ACTION_COLS
	df = df[columns]
	out_path = os.path.join(tempfile.gettempdir(), f"actions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
	df.to_csv(out_path, index=False)
	return out_path

	# ================= UI ==================
	def main():
	with gr.Blocks(css="""
	#video-container video { width: 100% !important; border: 2px solid #ccc; border-radius: 10px; }
	button { font-size: 1.2em; padding: 1em; border-radius: 8px; }
	#record-btn { background-color: #4CAF50; color: white; }
	#add-action-btn { background-color: #2196F3; color: white; }
	#clear-action-btn { background-color: #f44336; color: white; }
	#accept-btn { background-color: #ff9800; color: white; }
	.gr-button { min-width: 120px; }

	/* Fullscreen modal */
	#modal {
	display: none;
	position: fixed;
	z-index: 9999;
	top: 0;
	left: 0;
	width: 100vw;
	height: 100vh;
	background-color: rgba(0, 0, 0, 0.9);
	padding: 2rem;
	box-sizing: border-box;
	overflow: auto;
	}

	#modal.show {
	display: flex;
	align-items: center;
	justify-content: center;
	}

	#modal-content {
	background-color: #1e1e1e;
	border-radius: 16px;
	padding: 2rem;
	width: 80vw;
	max-width: none;
	max-height: 80vh;
	box-sizing: border-box;
	color: white;
	box-shadow: 0 0 30px rgba(0, 0, 0, 0.5);
	display: flex;
	flex-direction: column;
	overflow: hidden;
	}

	#modal-content textarea {
	width: 100% !important;
	height: 60vh !important;
	font-family: Arial, sans-serif !important;
	font-size: 1rem !important;
	background-color: #2a2a2a;
	color: white;
	border-radius: 12px;
	padding: 1rem;
	border: none;
	resize: none;
	box-sizing: border-box;
	flex-grow: 1;
	overflow-y: auto;
	line-height: 1.5;
	}

	#close-button {
	margin-top: 1rem;
	padding: 0.75rem 1.5rem;
	font-size: 1rem;
	background-color: #444;
	color: white;
	border: none;
	border-radius: 10px;
	cursor: pointer;
	align-self: center;
	}

	#close-button:hover {
	background-color: #555;
	}

	.modal-markdown {
	background-color: #2a2a2a;
	padding: 1rem;
	border-radius: 12px;
	max-height: 60vh;
	overflow-y: auto;
	color: white;
	font-family: Arial, sans-serif;
	line-height: 1.6;
	}

	.modal-markdown h1,
	.modal-markdown h2,
	.modal-markdown h3 {
	margin-top: 0;
	margin-bottom: 0.5rem;
	color: #fff;
	}

	.modal-markdown p {
	margin-bottom: 0.8rem;
	}

	.modal-markdown ul,
	.modal-markdown ol {
	margin-bottom: 0.8rem;
	padding-left: 1.5rem;
	}

	.modal-markdown strong {
	color: #4CAF50;
	}

	.modal-markdown code {
	background-color: #3a3a3a;
	padding: 0.2rem 0.4rem;
	border-radius: 4px;
	font-family: 'Courier New', monospace;
	}
	""") as demo:
	gr.Markdown("## 🎥 Live Webcam + 📝 Actions + 🤖 AI Summary (MentaLLaMA-chat-7B)")

	with gr.Row():
	with gr.Column(scale=2):
	webcam = gr.Video(label="Webcam Preview", elem_id="video-container", sources="webcam")
	with gr.Row():
	record_button = gr.Button("Start", elem_id="record-btn")
	add_action_btn = gr.Button("➕ Add Actions", elem_id="add-action-btn")
	with gr.Column(scale=1):
	gr.Markdown("### 📝 Actions Table")
	action_multiselect = gr.CheckboxGroup(choices=ACTION_COLS, label="เลือก Actions")
	clear_action_btn = gr.Button("🗑️ ลบ Actions ทั้งหมด", elem_id="clear-action-btn")
	actions_table = gr.Dataframe(headers=["Index", "Timestamp", "Actions"], value=[], interactive=False, wrap=True)
	accept_btn = gr.Button("✅ Accept & Generate AI Summary", visible=False, elem_id="accept-btn")
	download_file = gr.File(label="Download CSV", visible=False)

	# Modal components for LLM Summary
	with gr.Group(elem_id="modal") as modal:
	with gr.Column(elem_id="modal-content"):
	gr.Markdown("### 🤖 AI Analysis Summary (MentaLLaMA-chat-7B)")
	llm_summary = gr.Markdown(
	value="AI summary will appear here...",
	elem_classes=["modal-markdown"]
	)
	close_btn = gr.Button("Close", elem_id="close-button")

	# State variables
	actions_state = gr.State([])
	is_recording = gr.State(False)
	recording_start_time = gr.State(None)
	edit_index = gr.State(None)
	edit_actions = gr.State(None)

	# Event handlers
	record_button.click(
	toggle_recording,
	inputs=[record_button, is_recording, recording_start_time, actions_state, accept_btn],
	outputs=[record_button, is_recording, recording_start_time, accept_btn]
	)

	add_action_btn.click(
	add_action,
	inputs=[action_multiselect, actions_state, is_recording, recording_start_time, edit_index],
	outputs=[actions_state, action_multiselect, edit_index]
	).then(
	render_table,
	inputs=[actions_state, edit_index],
	outputs=actions_table
	)

	clear_action_btn.click(
	clear_actions,
	outputs=[actions_state, action_multiselect, edit_index]
	).then(
	render_table,
	inputs=[actions_state, edit_index],
	outputs=actions_table
	)

	# Accept button - show modal with LLM summary and generate CSV
	accept_btn.click(
	fn=LLM_Summarize,
	inputs=actions_state,
	outputs=llm_summary,
	js="() => { document.getElementById('modal').classList.add('show'); return []; }"
	).then(
	accept_labels,
	inputs=actions_state,
	outputs=download_file
	).then(
	lambda: gr.update(visible=True),
	outputs=download_file
	)

	# Close button - hide modal
	close_btn.click(
	fn=None,
	outputs=None,
	js="() => { document.getElementById('modal').classList.remove('show'); return []; }"
	)

	# Edit functionality
	with gr.Row():
	edit_row = gr.Number(label="📝 Edit Row (0-based)", precision=0)
	edit_multiselect = gr.CheckboxGroup(choices=ACTION_COLS, label="แก้ไข Actions")

	edit_row.change(
	lambda idx, actions_state: start_edit(idx, actions_state),
	inputs=[edit_row, actions_state],
	outputs=[edit_index, edit_multiselect, actions_state]
	)

	# เมื่อแก้ไข multiselect ให้บันทึกอัตโนมัติ
	edit_multiselect.change(
	lambda new_actions, edit_index, actions_state: start_edit(edit_index, actions_state, new_actions),
	inputs=[edit_multiselect, edit_index, actions_state],
	outputs=[edit_index, edit_multiselect, actions_state]
	).then(
	render_table,
	inputs=[actions_state, edit_index],
	outputs=actions_table
	)
	demo.launch()

	if __name__ == "__main__":
	main()