Yzy00518 commited on
Commit
19f880d
·
verified ·
1 Parent(s): 0541339

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -0
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import pickle
4
+ import requests
5
+ import time
6
+ import re
7
+ from huggingface_hub import hf_hub_download, list_repo_files
8
+
9
+ app_root = os.path.dirname(os.path.abspath(__file__))
10
+ app_root = os.path.join(app_root, "src/app")
11
+
12
+ repo_id = "Yzy00518/motionReFit"
13
+
14
+ selected_videos = {
15
+ '000021': 'walking',
16
+ '000472': 'writing something',
17
+ '001454': 'walking backward',
18
+ '002093': 'cleaning the window',
19
+ '002550': 'walking in a Zig-Zag pattern',
20
+ '003111': 'dancing',
21
+ '003712': 'kneling down and crawling',
22
+ '004163': 'flying like a bird',
23
+ '004455': 'running in place',
24
+ '004912': 'swimming',
25
+ '005458': 'running forward',
26
+ '005869': 'pick up something',
27
+ '006662': 'falling down',
28
+ '006979': 'punching',
29
+ '007354': 'crawling',
30
+ '007822': 'jumping on both sides',
31
+ '008162': 'dancing like a robot',
32
+ '009768': 'looking back',
33
+ '010193': 'lifting something heavy',
34
+ '013449': 'punching with fists',
35
+ '013659': 'jumping jacks',
36
+ '014920': 'walking in place',
37
+ '015249': 'putting something on their face',
38
+ '015729': 'cleaning the table',
39
+ }
40
+
41
+ def is_six_digit_filename(file_path):
42
+ basename = os.path.basename(file_path)
43
+ return bool(re.match(r'^\d{6}\.[a-zA-Z0-9]+$', basename))
44
+
45
+ def extract_six_digit_code(file_path):
46
+ basename = os.path.basename(file_path)
47
+ match = re.search(r'(\d{6})', basename)
48
+ return match.group(1) if match else None
49
+
50
+ def rename_files_with_six_digit_code(file_path):
51
+ if not is_six_digit_filename(file_path):
52
+ return file_path
53
+ six_digit_code = extract_six_digit_code(file_path)
54
+ if six_digit_code in selected_videos:
55
+ new_name = selected_videos[six_digit_code]
56
+ if '.mp4' in file_path:
57
+ new_path = os.path.join(os.path.dirname(file_path), f"{new_name}.mp4")
58
+ elif '.pkl' in file_path:
59
+ new_path = os.path.join(os.path.dirname(file_path), f"{new_name}.pkl")
60
+ else:
61
+ raise ValueError(f"Invalid file extension: {file_path}")
62
+ os.rename(file_path, new_path)
63
+ print(f"Renamed: {file_path} -> {new_path}")
64
+ return new_path
65
+ return None
66
+
67
+
68
+ def download_files_from_huggingface(repo_id, repo_type, max_retries=3):
69
+ file_list = list_repo_files(repo_id=repo_id, repo_type=repo_type)
70
+ for file in file_list:
71
+ relative_path = os.path.dirname(file)
72
+ local_path = os.path.join(os.getcwd(), relative_path)
73
+ if not os.path.exists(local_path):
74
+ os.makedirs(local_path)
75
+
76
+ if is_six_digit_filename(file) and (file not in selected_videos):
77
+ print(f"Skipping: {file}")
78
+ continue
79
+
80
+ for attempt in range(max_retries):
81
+ try:
82
+ hf_hub_download(repo_id=repo_id,
83
+ filename=file,
84
+ local_dir=os.getcwd(),
85
+ local_dir_use_symlinks=False,
86
+ resume_download=True,)
87
+ print(f"Successfully Download: {file}")
88
+ rename_files_with_six_digit_code(os.path.join(local_path, os.path.basename(file)))
89
+ break
90
+ except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
91
+ if attempt < max_retries - 1:
92
+ wait_time = (attempt + 1)*2
93
+ print(f"Download failed, retrying in {wait_time} seconds")
94
+ time.sleep(wait_time)
95
+ except FileExistsError:
96
+ print(f"{file} Exists")
97
+ break
98
+
99
+ download_files_from_huggingface(repo_id, 'model')
100
+
101
+ from src.app.setup_models import *
102
+ from src.app.pipeline import pipeline
103
+
104
+ def select_and_show(data_id):
105
+ video_path = os.path.join(app_root, f"app_base_motion_mp4/{data_id}.mp4")
106
+ return video_path if os.path.exists(video_path) else None
107
+
108
+
109
+ from openai import OpenAI
110
+ def __translate(text_raw, temperature=1.5):
111
+ MODEL_NAME = 'gpt-3.5-turbo-0125'
112
+ MAX_TOKENS = 400
113
+
114
+ os.environ['OPENAI_API_KEY'] = 'sk-gCaeFye5rjfnhdvDDaE24205108b4bB1Bf1497D6A9EeB704'
115
+ client = OpenAI(
116
+ api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"),
117
+ base_url='https://api.xty.app/v1'
118
+ )
119
+
120
+ content = [
121
+ {
122
+ "type": "text",
123
+ "text": F"""
124
+ You will be given a sentence about a person's action or description about their motion.
125
+ First, determine which of the following three tasks best fits the input:
126
+ regen (regeneration) (Use this option very often.) – Modify or generate a new version of the given action (e.g., 'a person is waving his hands' or 'a person kicks with his left foot').
127
+ style_transfer – Add an emotional or stylistic tone to the given action. For example, 'Replace a person with a proud look using their upper body'. In this task, the output should include one of these adjectives (angry, sad, proud, old, or sexy) and should ONLY change the action style, NOT the action itself.
128
+ adjustment – (Use this option very rarely.) Only choose this when the input clearly specifies explicit adjustments such as increasing or decreasing the motion amplitude or switching from clockwise to counterclockwise.
129
+ Next, simplify and normalize the input text by retaining only the parts that emphasize how the action changes. Remove any details about the person's appearance, gender, body type, scenery, or objects they hold.
130
+ Only for style_transfer tasks, format the output as:   Replace a person with a X look using their Y body. Here, X must be chosen from [angry, sad, proud, old, sexy] and Y is limited to either 'upper' or 'lower'.
131
+ For other tasks, the format should be 'The person is doing X with their Y'. Here, X is the action and Y is the body part[upper body, lower body, left arm, right arm, both arms] used to perform the action.
132
+ Finally, your output should follow the format:   task#text where 'task' is one of regen, style_transfer, or adjustment, and 'text' is the simplified and formatted result.
133
+ REMEMBER TO USE THE '#' SYMBOL TO SEPARATE THE TASK AND TEXT. For example, if the task is regen and the text is 'a person is waving his hands', the output should be: regen#a person is waving his hands with their both arms
134
+ REMEMBER YOU CAN ONLY CHOOSE ONE TASK. If the input contains multiple tasks, choose the one that best fits the input. If the input contains no tasks, choose regen.
135
+ """
136
+ },
137
+ ]
138
+ content.append({
139
+ "type": "text",
140
+ "text": F"""The sentence is: {text_raw}"""
141
+ })
142
+
143
+ # print(content)
144
+ messages = [{"role": "user", "content": content}]
145
+ params = {"model": MODEL_NAME, "messages": messages, "max_tokens": MAX_TOKENS, "temperature": temperature}
146
+ result = client.chat.completions.create(**params)
147
+
148
+ translated_text = result.choices[0].message.content
149
+ print("Translated Text:", translated_text)
150
+
151
+ return translated_text
152
+
153
+
154
+ def translate(text_raw):
155
+ tasks = ['regen', 'style_transfer', 'adjustment']
156
+ split = '#'
157
+ tempreture = 1.5
158
+ timer = 0
159
+ while timer <= 5:
160
+ result = __translate(text_raw, tempreture)
161
+ if split in result:
162
+ task, text = result.split(split)[0], result.split(split)[1]
163
+ if task in tasks:
164
+ print("=========GPT-3.5 Turbo successfully translated the text.===========")
165
+ print("Task:", task)
166
+ print("Text:", text)
167
+ print("===================================================================")
168
+ return task, text
169
+
170
+ tempreture -= 0.2
171
+ timer += 1
172
+ return None, None
173
+
174
+
175
+ def inference_warpper(data_id, change_prompt):
176
+ def select_model(task):
177
+ if task == 'regen':
178
+ return {'model': models['regen'], 'disc_model': models['regen_disc']}
179
+ elif task == 'style_transfer':
180
+ return {'model': models['style_transfer'], 'disc_model': models['style_transfer_disc']}
181
+ elif task == 'adjustment':
182
+ return {'model': models['adjustment'], 'disc_model': models['adjustment_disc']}
183
+ else:
184
+ raise ValueError(f"Invalid task: {task}")
185
+
186
+ if data_id.endswith('fps'):
187
+ data_id = data_id[:-3]
188
+ with open(os.path.join(app_root, f"app_base_motion/{data_id}.pkl"), 'rb') as f:
189
+ motion = pickle.load(f)
190
+
191
+ task, text_ = translate(change_prompt)
192
+ if task is None:
193
+ print("GPT-3.5 Turbo failed to translate the text.")
194
+ return None
195
+
196
+ model = select_model(task)
197
+ data = {
198
+ 'source': motion,
199
+ 'text': text_,
200
+ 'prog_ind': None,
201
+ 'All_one_model': True,
202
+ 'model_type': task,
203
+ }
204
+ ret = pipeline(data, model, device ,diffuser, SEQLEN=16, smplx_pth='deps/smplx')
205
+ return ret
206
+
207
+ def get_all_videos():
208
+ video_dir = os.path.join(app_root, "app_base_motion_mp4")
209
+ all_files = [f for f in os.listdir(video_dir) if f.endswith('.mp4')]
210
+ all_files.sort()
211
+ return [os.path.splitext(f)[0] for f in all_files]
212
+
213
+ def bar_length(data_id):
214
+ return 4
215
+
216
+ with gr.Blocks() as demo:
217
+ gr.Markdown("###Select a Raw Motion")
218
+ all_videos = get_all_videos()
219
+
220
+ with gr.Row():
221
+ data_id_input = gr.Dropdown(
222
+ label=f"motion can be selected({len(all_videos)} in total)",
223
+ choices=all_videos,
224
+ multiselect=False,
225
+ allow_custom_value=True)
226
+ show_video_button = gr.Button("Display Motion")
227
+ video_output = gr.Video(label="Selected Motion")
228
+
229
+ change_prompt_textbox = gr.Textbox(visible=True, label="Text Instruction")
230
+ inference_button = gr.Button("Inference")
231
+ output_file = gr.Video(label="Generated Video(truncated to 5sec due to the constraint of resource)")
232
+
233
+ show_video_button.click(
234
+ fn=select_and_show,
235
+ inputs=[data_id_input],
236
+ outputs=[video_output]
237
+ )
238
+
239
+ inference_button.click(
240
+ fn=inference_warpper,
241
+ inputs=[data_id_input, change_prompt_textbox],
242
+ outputs=[output_file]
243
+ )
244
+
245
+ demo.launch()