maryna7679 commited on
Commit
f7d2a44
·
0 Parent(s):

Project base (no login)

Browse files
.gitignore ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Covers JetBrains IDEs: IntelliJ, GoLand, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
2
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
3
+
4
+ # User-specific stuff
5
+ .idea/**/workspace.xml
6
+ .idea/**/tasks.xml
7
+ .idea/**/usage.statistics.xml
8
+ .idea/**/dictionaries
9
+ .idea/**/shelf
10
+
11
+ # AWS User-specific
12
+ .idea/**/aws.xml
13
+
14
+ # Generated files
15
+ .idea/**/contentModel.xml
16
+
17
+ # Sensitive or high-churn files
18
+ .idea/**/dataSources/
19
+ .idea/**/dataSources.ids
20
+ .idea/**/dataSources.local.xml
21
+ .idea/**/sqlDataSources.xml
22
+ .idea/**/dynamic.xml
23
+ .idea/**/uiDesigner.xml
24
+ .idea/**/dbnavigator.xml
25
+
26
+ # Gradle
27
+ .idea/**/gradle.xml
28
+ .idea/**/libraries
29
+
30
+ # Gradle and Maven with auto-import
31
+ # When using Gradle or Maven with auto-import, you should exclude module files,
32
+ # since they will be recreated, and may cause churn. Uncomment if using
33
+ # auto-import.
34
+ # .idea/artifacts
35
+ # .idea/compiler.xml
36
+ # .idea/jarRepositories.xml
37
+ # .idea/modules.xml
38
+ # .idea/*.iml
39
+ # .idea/modules
40
+ # *.iml
41
+ # *.ipr
42
+
43
+ # CMake
44
+ cmake-build-*/
45
+
46
+ # Mongo Explorer plugin
47
+ .idea/**/mongoSettings.xml
48
+
49
+ # File-based project format
50
+ *.iws
51
+
52
+ # IntelliJ
53
+ out/
54
+
55
+ # mpeltonen/sbt-idea plugin
56
+ .idea_modules/
57
+
58
+ # JIRA plugin
59
+ atlassian-ide-plugin.xml
60
+
61
+ # Cursive Clojure plugin
62
+ .idea/replstate.xml
63
+
64
+ # SonarLint plugin
65
+ .idea/sonarlint/
66
+ .idea/sonarlint.xml # see https://community.sonarsource.com/t/is-the-file-idea-idea-idea-sonarlint-xml-intended-to-be-under-source-control/121119
67
+
68
+ # Crashlytics plugin (for Android Studio and IntelliJ)
69
+ com_crashlytics_export_strings.xml
70
+ crashlytics.properties
71
+ crashlytics-build.properties
72
+ fabric.properties
73
+
74
+ # Editor-based HTTP Client
75
+ .idea/httpRequests
76
+ http-client.private.env.json
77
+
78
+ # Android studio 3.1+ serialized cache file
79
+ .idea/caches/build_file_checksums.ser
80
+
81
+ # Apifox Helper cache
82
+ .idea/.cache/.Apifox_Helper
83
+ .idea/ApifoxUploaderProjectSetting.xml
84
+
85
+ # Project specific
86
+ key.json
87
+ venv
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/CaptionProject.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="31">
8
+ <item index="0" class="java.lang.String" itemvalue="httpx" />
9
+ <item index="1" class="java.lang.String" itemvalue="python-dateutil" />
10
+ <item index="2" class="java.lang.String" itemvalue="python-dotenv" />
11
+ <item index="3" class="java.lang.String" itemvalue="h11" />
12
+ <item index="4" class="java.lang.String" itemvalue="MarkupSafe" />
13
+ <item index="5" class="java.lang.String" itemvalue="requests" />
14
+ <item index="6" class="java.lang.String" itemvalue="Jinja2" />
15
+ <item index="7" class="java.lang.String" itemvalue="sniffio" />
16
+ <item index="8" class="java.lang.String" itemvalue="typing-inspection" />
17
+ <item index="9" class="java.lang.String" itemvalue="exceptiongroup" />
18
+ <item index="10" class="java.lang.String" itemvalue="eval_type_backport" />
19
+ <item index="11" class="java.lang.String" itemvalue="zipp" />
20
+ <item index="12" class="java.lang.String" itemvalue="certifi" />
21
+ <item index="13" class="java.lang.String" itemvalue="anyio" />
22
+ <item index="14" class="java.lang.String" itemvalue="urllib3" />
23
+ <item index="15" class="java.lang.String" itemvalue="itsdangerous" />
24
+ <item index="16" class="java.lang.String" itemvalue="Flask" />
25
+ <item index="17" class="java.lang.String" itemvalue="blinker" />
26
+ <item index="18" class="java.lang.String" itemvalue="annotated-types" />
27
+ <item index="19" class="java.lang.String" itemvalue="importlib_metadata" />
28
+ <item index="20" class="java.lang.String" itemvalue="pydantic" />
29
+ <item index="21" class="java.lang.String" itemvalue="six" />
30
+ <item index="22" class="java.lang.String" itemvalue="Werkzeug" />
31
+ <item index="23" class="java.lang.String" itemvalue="click" />
32
+ <item index="24" class="java.lang.String" itemvalue="pydantic_core" />
33
+ <item index="25" class="java.lang.String" itemvalue="colorama" />
34
+ <item index="26" class="java.lang.String" itemvalue="typing_extensions" />
35
+ <item index="27" class="java.lang.String" itemvalue="charset-normalizer" />
36
+ <item index="28" class="java.lang.String" itemvalue="mistralai" />
37
+ <item index="29" class="java.lang.String" itemvalue="httpcore" />
38
+ <item index="30" class="java.lang.String" itemvalue="idna" />
39
+ </list>
40
+ </value>
41
+ </option>
42
+ </inspection_tool>
43
+ </profile>
44
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (CaptionProject)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/CaptionProject.iml" filepath="$PROJECT_DIR$/.idea/CaptionProject.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="" />
5
+ </component>
6
+ </project>
README.md ADDED
Binary file (38 Bytes). View file
 
captions.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"clean_text": "measure", "start_time": 0.0, "user_id": 11, "signer": 39, "file": "Aj9SDSAOXf4", "end_time": 1.435, "url": "www.youtube.com/watch?v=Aj9SDSAOXf4", "text": "measure"}
2
+ {"clean_text": "apple", "start_time": 1.5, "user_id": 11, "signer": 39, "file": "Aj9SDSAOXf4", "end_time": 1.6, "url": "www.youtube.com/watch?v=Aj9SDSAOXf4", "text": "apple"}
3
+ {"clean_text": "test", "start_time": 0.0, "user_id": 11, "signer": 39, "file": "c2ORbHSQ5pw", "end_time": 1.435, "url": "www.youtube.com/watch?v=c2ORbHSQ5pw", "text": "test"}
4
+ {"clean_text": "aaaaaaaaaa", "start_time": 1.5, "user_id": 11, "signer": 39, "file": "c2ORbHSQ5pw", "end_time": 1.6, "url": "www.youtube.com/watch?v=c2ORbHSQ5pw", "text": "aaaaaaaaaa"}
5
+ {"clean_text": "bbbbbbbb", "start_time": 1.5, "user_id": 11, "signer": 39, "file": "c2ORbHSQ5pw", "end_time": 1.6, "url": "www.youtube.com/watch?v=c2ORbHSQ5pw", "text": "bbbbbbbb"}
captions2.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"clean_text":"measure","start_time":0.0,"user_id":11,"signer":39,"file":"a1NeXe8bhuo","end_time":1.435,"url":"www.youtube.com\/watch?v=a1NeXe8bhuo","text":"measure"}
2
+ {"clean_text":"apple","start_time":1.5,"user_id":11,"signer":39,"file":"a1NeXe8bhuo","end_time":1.6,"url":"www.youtube.com\/watch?v=a1NeXe8bhuo","text":"apple"}
db_upload.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import db
3
+ import json
4
+
5
+ cred_obj = firebase_admin.credentials.Certificate('key.json')
6
+ default_app = firebase_admin.initialize_app(cred_obj, {
7
+ 'databaseURL': "https://video-link-db-default-rtdb.europe-west1.firebasedatabase.app/"
8
+ })
9
+ videos_ref = db.reference("/Videos")
10
+ with open("videos.json", "r") as f:
11
+ file_contents = json.load(f)
12
+ videos_ref.set(file_contents)
main.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ import firebase_admin
5
+ from firebase_admin import db
6
+ import hashlib
7
+ import jsonlines
8
+ import json
9
+
10
+ cred_obj = firebase_admin.credentials.Certificate('key.json')
11
+ default_app = firebase_admin.initialize_app(cred_obj, {
12
+ 'databaseURL': "https://video-link-db-default-rtdb.europe-west1.firebasedatabase.app/"
13
+ })
14
+ videos_ref = db.reference("/Videos")
15
+
16
+ with open("captions.jsonl") as file:
17
+ captions = pd.read_json(file, lines=True)
18
+
19
+ # videos = ["Aj9SDSAOXf4", "c2ORbHSQ5pw"]
20
+ current_video = 0
21
+ video_links = videos_ref.get()[1:]
22
+
23
+ css = """
24
+ .container {
25
+ position: relative;
26
+ width: 100%;
27
+ height: 0;
28
+ padding-bottom: 56.25%;
29
+ }
30
+ .video {
31
+ position: absolute;
32
+ top: 0;
33
+ left: 0;
34
+ width: 100%;
35
+ height: 100%;
36
+ }"""
37
+
38
+
39
+ def auth_function(username, password):
40
+ return True
41
+
42
+
43
+ def youtube_link_to_id(link):
44
+ video_id = re.findall("=(.*?)&", link)
45
+ if len(video_id) == 0:
46
+ video_id = re.findall("=(.*)", link)
47
+ return video_id[0]
48
+
49
+
50
+ def get_captions(video_id):
51
+ global captions
52
+ captions_edit = captions[captions['file'] == video_id]
53
+ captions_edit = captions_edit[['start_time', 'text', 'end_time']]
54
+ captions_edit.columns = ["Start", "Text", "End"]
55
+ return captions_edit
56
+
57
+
58
+ def get_next_captions():
59
+ global current_video, video_links
60
+ return get_captions(youtube_link_to_id(video_links[current_video]))
61
+ # global current_video, videos
62
+ # return get_captions(videos[current_video])
63
+
64
+
65
+ def get_youtube_video(video_id):
66
+ return f"""
67
+ <div class="container">
68
+ <iframe src="https://www.youtube.com/embed/{video_id}" frameborder="0" allowfullscreen class="video"></iframe>
69
+ </div>"""
70
+
71
+
72
+ def get_next_youtube_video():
73
+ global current_video, video_links
74
+ current_video += 1
75
+ if current_video == len(video_links):
76
+ current_video = 0
77
+ return get_youtube_video(youtube_link_to_id(video_links[current_video]))
78
+ # global current_video, videos
79
+ # current_video += 1
80
+ # if current_video == len(videos):
81
+ # current_video = 0
82
+ # return get_youtube_video(videos[current_video])
83
+
84
+
85
+ def refresh_components():
86
+ next_video = get_next_youtube_video()
87
+ next_captions = get_next_captions()
88
+ return next_video, next_captions
89
+
90
+
91
+ def save(df):
92
+ try:
93
+ global captions
94
+ captions['start_time'] = df['Start'].apply(lambda x: float(x))
95
+ captions['text'] = df['Text']
96
+ captions['end_time'] = df['End'].apply(lambda x: float(x))
97
+ captions.to_json('captions2.jsonl', orient='records', lines=True)
98
+ return "Save successful!"
99
+ except ValueError:
100
+ return "Save failed: Incorrect input format"
101
+
102
+
103
+ with gr.Blocks(css=css) as app:
104
+ gr.Markdown("## Caption Editor")
105
+ with gr.Row():
106
+ with gr.Column():
107
+ caption_editor = gr.DataFrame(interactive=True,
108
+ value=get_captions(youtube_link_to_id(video_links[current_video])),
109
+ datatype=["number", "str", "number"],
110
+ row_count=(get_captions(youtube_link_to_id(video_links[current_video])).shape[0], "fixed"),
111
+ col_count=(3, "fixed"), column_widths=["20%", "60%", "20%"])
112
+ save_button = gr.Button("Save")
113
+ save_result = gr.Markdown()
114
+ with gr.Column():
115
+ video_embed = gr.HTML(value=get_youtube_video(youtube_link_to_id(video_links[0])))
116
+ next_video_button = gr.Button("Next")
117
+
118
+ next_video_button.click(fn=refresh_components, outputs=[video_embed, caption_editor])
119
+ save_button.click(fn=save, inputs=caption_editor, outputs=save_result)
120
+
121
+ app.launch(auth=auth_function)
requirements.txt ADDED
Binary file (2.29 kB). View file
 
videos.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "1": "www.youtube.com/watch?v=c2ORbHSQ5pw",
3
+ "2": "www.youtube.com/watch?v=Aj9SDSAOXf4"
4
+ }