BenliusYang commited on
Commit
f4eb238
·
verified ·
1 Parent(s): 6160b5a

Create gradio_ui6.1.py

Browse files
Files changed (1) hide show
  1. gradio_ui6.1.py +188 -0
gradio_ui6.1.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ffmpeg
3
+ from funasr import AutoModel
4
+ from moviepy.editor import VideoFileClip
5
+ import os
6
+ import subprocess
7
+ import base64
8
+ from PIL import Image
9
+ import io
10
+ import uuid
11
+ import shutil
12
+ import glob
13
+ from openai import OpenAI
14
+
15
+ # 初始化funasr模型
16
+ model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc-c")
17
+
18
+ client = OpenAI(api_key="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")
19
+
20
+ # 记录所有临时文件路径的列表
21
+ temp_files = []
22
+
23
+ def clear_directory(dir_path):
24
+ if os.path.exists(dir_path):
25
+ shutil.rmtree(dir_path)
26
+ os.makedirs(dir_path, exist_ok=True)
27
+
28
+ def segment_video(video_path, segment_length):
29
+ segment_paths = []
30
+ with VideoFileClip(video_path) as video:
31
+ total_duration = int(video.duration)
32
+ for start in range(0, total_duration, segment_length):
33
+ end = min(start + segment_length, total_duration)
34
+ segment_path = f"segment_{uuid.uuid4()}.mp4"
35
+ ffmpeg.input(video_path, ss=start, to=end).output(segment_path).run()
36
+ segment_paths.append(segment_path)
37
+ temp_files.append(segment_path)
38
+ return segment_paths
39
+
40
+ def extract_audio(segment_path):
41
+ audio_path = f"extracted_audio_{uuid.uuid4()}.wav"
42
+ ffmpeg.input(segment_path).output(audio_path).run()
43
+ temp_files.append(audio_path)
44
+ return audio_path
45
+
46
+ def audio_to_text_with_funasr(audio_path):
47
+ res = model.generate(input=audio_path, batch_size_s=300, hotword='魔搭')
48
+ if isinstance(res, list) and len(res) > 0:
49
+ text = " ".join([item.get('text', '') for item in res])
50
+ else:
51
+ text = '无法识别音频内容'
52
+ return text
53
+
54
+ def process_text_with_openai(text):
55
+ response = client.chat.completions.create(
56
+ model='gpt-4-0125-preview',
57
+ messages=[
58
+ {"role": "system", "content": "作为一名综合语言处理专家,您的任务是对课堂录音的文本信息进行分析。关键信息提取: 识别并提取重概念和关键点。情感分析: 分析文本情绪,判断文本所传达的情绪状态,估计分值(1-5).我会直接给到你需要处理的文本,你会直接针对文本进行处理,然后返回结果,只输出关键信息和情感分析的直接结果。输出的结果要十分精简。"},
59
+ {"role": "user", "content": text},
60
+ ],
61
+
62
+
63
+
64
+ temperature=0.1
65
+ )
66
+ processed_text = response.choices[0].message.content.strip()
67
+ print(processed_text)
68
+ return processed_text
69
+
70
+ def resize_and_encode_image_to_base64(image_path, output_size=(512, 512)):
71
+ with Image.open(image_path) as img:
72
+ img.thumbnail(output_size, Image.ANTIALIAS)
73
+ img_byte_arr = io.BytesIO()
74
+ img.save(img_byte_arr, format='JPEG') # 保存为JPG格式
75
+ encoded_string = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
76
+ return encoded_string
77
+
78
+ def describe_image_with_openai(base64_image):
79
+ response = client.chat.completions.create(
80
+ model="gpt-4-vision-preview",
81
+ messages=[
82
+ {
83
+
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "text", "text": "你是一位专业的课堂状态分析员。你会按照以下步骤完成任务。首先,面部分析识别情绪:1.识别图片中的面部表情。2.将面部表情与相应的基本情绪关联起来。3.注意面部表情的细微差别,这些可能指示更复杂的情绪状态。其次,身体姿态分析互动关系:1.观察并报告图像中个体的身体语言和姿态。2.根据姿态推断互动的性质(例如,协作、对抗)。3.考虑个体之间的距离和方向,以了解互动关系。最后,反馈:直接给出对于图片中情绪状态和互动关系的评分(1-5的范围),不需要给到分析过程。你返回给我的结果只是评分,其他信息都不需要"},
87
+ {"type": "image_url","image_url": { "url": f"data:image/jpeg;base64,{base64_image}"}}
88
+ ]
89
+ }
90
+ ],
91
+ max_tokens=300
92
+ )
93
+ description = response.choices[0].message.content.strip()
94
+ print(description)
95
+ return description
96
+
97
+
98
+ def video_to_images(segment_path, interval, start_time):
99
+ clip = VideoFileClip(segment_path)
100
+ images_descriptions = []
101
+ frames_dir = os.path.join("frames", str(uuid.uuid4()))
102
+ os.makedirs(frames_dir, exist_ok=True)
103
+ for i in range(0, int(clip.duration), interval):
104
+ img_path = os.path.join(frames_dir, f"frame_at_{i}.jpg")
105
+ clip.save_frame(img_path, t=i)
106
+ base64_image = resize_and_encode_image_to_base64(img_path)
107
+ description = describe_image_with_openai(base64_image)
108
+ minutes = (start_time + i) // 60
109
+ seconds = (start_time + i) % 60
110
+ timestamp = f"{minutes}分{seconds}秒"
111
+ images_descriptions.append((base64_image, timestamp, description))
112
+ temp_files.append(img_path)
113
+ return images_descriptions
114
+
115
+ def download_video(video_url):
116
+ video_path = f"downloaded_video_{uuid.uuid4()}.mp4"
117
+ subprocess.run([
118
+ "ffmpeg", "-y", "-i", video_url,
119
+ "-vcodec", "libx264", "-crf", "28", "-preset", "ultrafast",
120
+ video_path
121
+ ], check=True)
122
+ temp_files.append(video_path)
123
+ return video_path
124
+
125
+ def clear_temp_files():
126
+ for file_path in temp_files:
127
+ if os.path.exists(file_path):
128
+ os.remove(file_path)
129
+ temp_files.clear()
130
+
131
+ def process_video(video_url, segment_minutes, image_interval_seconds):
132
+ clear_directory("frames")
133
+ clear_directory("segments")
134
+ video_path = download_video(video_url)
135
+ segment_length = segment_minutes * 60
136
+ segments = segment_video(video_path, segment_length)
137
+
138
+ html_results = []
139
+ for i, segment_path in enumerate(segments):
140
+ start_time = i * segment_length
141
+ end_time = min((i + 1) * segment_length, int(VideoFileClip(video_path).duration))
142
+ audio_path = extract_audio(segment_path)
143
+ text = audio_to_text_with_funasr(audio_path)
144
+ processed_text = process_text_with_openai(text)
145
+ images_descriptions = video_to_images(segment_path, image_interval_seconds, start_time)
146
+
147
+ title = f"第 {start_time//60} 分钟到 {end_time//60} 分钟的内容"
148
+ images_html = ''.join([f'<div style="flex-basis: calc(33.333% - 10px); margin-right: 10px; margin-bottom: 10px; text-align: center;"><img src="data:image/png;base64,{image[0]}" style="width: 100%;" /><p style="margin-top: 5px;">{image[1]} - {image[2]}</p></div>' for image in images_descriptions])
149
+
150
+ segment_html = f'''
151
+ <div style="margin-bottom: 40px;">
152
+ <h2 style="text-align: center;">{title}</h2>
153
+ <div style="display: flex; flex-direction: row; gap: 20px;">
154
+ <div style="flex: 1; padding: 10px; border: 1px solid #ccc; border-radius: 5px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
155
+ <p style="margin: 0;">{processed_text}</p>
156
+ </div>
157
+ <div style="flex: 2; padding: 10px; border: 1px solid #ccc; border-radius: 5px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); display: flex; flex-wrap: wrap; align-items: flex-start; gap: 10px;">
158
+ {images_html}
159
+ </div>
160
+ </div>
161
+ </div>
162
+ '''
163
+
164
+ html_results.append(segment_html)
165
+
166
+ final_html = '<div>' + ''.join(html_results) + '</div>'
167
+
168
+ return final_html
169
+
170
+ clear_temp_files()
171
+
172
+ with gr.Blocks() as blocks_interface:
173
+ gr.Markdown("视频内容浏览工具")
174
+ gr.Markdown("请输入视频地址并选择时间间隔来提取和分析视频内容。")
175
+ with gr.Row():
176
+ video_url = gr.Textbox(label="视频URL", placeholder="请输入视频URL")
177
+ segment_minutes = gr.Slider(minimum=1, maximum=10, value=5, label="分段间隔(分钟)", step=1)
178
+ image_interval_seconds = gr.Slider(minimum=10, maximum=60, value=30, label="图像提取间隔(秒)", step=5)
179
+ submit_button = gr.Button("提交")
180
+ output_container = gr.HTML()
181
+
182
+ submit_button.click(
183
+ process_video,
184
+ inputs=[video_url, segment_minutes, image_interval_seconds],
185
+ outputs=output_container
186
+ )
187
+
188
+ blocks_interface.launch()