AmirMoris commited on
Commit
7d7aeee
·
verified ·
1 Parent(s): d9ca905

Upload 9 files

Browse files
Textual Dataset Generation/.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
Textual Dataset Generation/.idea/Textual Dataset Generation.iml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="JAVA_MODULE" version="4">
3
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
4
+ <exclude-output />
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="inheritedJdk" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ </component>
9
+ </module>
Textual Dataset Generation/.idea/misc.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="11" project-jdk-type="JavaSDK">
4
+ <output url="file://$PROJECT_DIR$/out" />
5
+ </component>
6
+ </project>
Textual Dataset Generation/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Textual Dataset Generation.iml" filepath="$PROJECT_DIR$/.idea/Textual Dataset Generation.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
Textual Dataset Generation/.idea/workspace.xml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="788fdcfa-b47d-4627-847f-ec90b9a0d9dd" name="Changes" comment="" />
5
+ <option name="SHOW_DIALOG" value="false" />
6
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
7
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
8
+ <option name="LAST_RESOLUTION" value="IGNORE" />
9
+ </component>
10
+ <component name="ProjectColorInfo"><![CDATA[{
11
+ "associatedIndex": 5
12
+ }]]></component>
13
+ <component name="ProjectId" id="2hyRbehAgzDFNKMNHP7EtzBH3c2" />
14
+ <component name="ProjectViewState">
15
+ <option name="hideEmptyMiddlePackages" value="true" />
16
+ <option name="showLibraryContents" value="true" />
17
+ </component>
18
+ <component name="PropertiesComponent"><![CDATA[{
19
+ "keyToString": {
20
+ "Python.web scrap.executor": "Run",
21
+ "RunOnceActivity.OpenProjectViewOnStart": "true",
22
+ "RunOnceActivity.ShowReadmeOnStart": "true",
23
+ "kotlin-language-version-configured": "true",
24
+ "last_opened_file_path": "C:/Users/Amir/Desktop/Textual Dataset Generation",
25
+ "nodejs_package_manager_path": "npm",
26
+ "project.structure.last.edited": "SDKs",
27
+ "project.structure.proportion": "0.15",
28
+ "project.structure.side.proportion": "0.22758621",
29
+ "vue.rearranger.settings.migration": "true"
30
+ }
31
+ }]]></component>
32
+ <component name="RunManager">
33
+ <configuration name="web scrap" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
34
+ <module name="Textual Dataset Generation" />
35
+ <option name="ENV_FILES" value="" />
36
+ <option name="INTERPRETER_OPTIONS" value="" />
37
+ <option name="PARENT_ENVS" value="true" />
38
+ <envs>
39
+ <env name="PYTHONUNBUFFERED" value="1" />
40
+ </envs>
41
+ <option name="SDK_HOME" value="" />
42
+ <option name="SDK_NAME" value="Python 3.11 (base)" />
43
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
44
+ <option name="IS_MODULE_SDK" value="false" />
45
+ <option name="ADD_CONTENT_ROOTS" value="true" />
46
+ <option name="ADD_SOURCE_ROOTS" value="true" />
47
+ <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
48
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/web scrap.py" />
49
+ <option name="PARAMETERS" value="" />
50
+ <option name="SHOW_COMMAND_LINE" value="false" />
51
+ <option name="EMULATE_TERMINAL" value="false" />
52
+ <option name="MODULE_MODE" value="false" />
53
+ <option name="REDIRECT_INPUT" value="false" />
54
+ <option name="INPUT_FILE" value="" />
55
+ <method v="2" />
56
+ </configuration>
57
+ </component>
58
+ <component name="SharedIndexes">
59
+ <attachedChunks>
60
+ <set>
61
+ <option value="jdk-11.0.21-corretto-11.0.21-3183f394aec4-d55de845" />
62
+ </set>
63
+ </attachedChunks>
64
+ </component>
65
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
66
+ <component name="TaskManager">
67
+ <task active="true" id="Default" summary="Default task">
68
+ <changelist id="788fdcfa-b47d-4627-847f-ec90b9a0d9dd" name="Changes" comment="" />
69
+ <created>1718564403600</created>
70
+ <option name="number" value="Default" />
71
+ <option name="presentableId" value="Default" />
72
+ <updated>1718564403600</updated>
73
+ <workItem from="1718564404645" duration="101000" />
74
+ </task>
75
+ <servers />
76
+ </component>
77
+ <component name="TypeScriptGeneratedFilesManager">
78
+ <option name="version" value="3" />
79
+ </component>
80
+ <component name="com.intellij.coverage.CoverageDataManagerImpl">
81
+ <SUITE FILE_PATH="coverage/Textual_Dataset_Generation$web_scrap.coverage" NAME="web scrap Coverage Results" MODIFIED="1718564477196" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
82
+ </component>
83
+ </project>
Textual Dataset Generation/Dataset/input_dataset.csv ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ category,caption
2
+ shoes,a person sitting on top of a wooden bench
3
+ shoes,unpaired red Nike sneaker
4
+ shoes,brown Nike sneaker on yellow textile
5
+ shoes,unpaired maroon plimsoll on top of yellow textile
6
+ shoes,a person with their feet up in the air
7
+ shoes,white and blue nike air force 1 high
8
+ shoes,pair of white-and-orange athletic shoes on white box
9
+ shoes,green and black nike athletic shoe
10
+ shoes,"Various colorful sports shoes laid on sand beach background, studio shot, flat lay."
11
+ shoes,unpaired OFF WHITE X Nike Air Force 1 low-top sneaker
12
+ shoes,white and red nike athletic shoe
13
+ shoes,person wearing white Nike running shoes standing on black concrete path
14
+ shoes,unpaired gray Nike running shoe
15
+ shoes,pair of blue-and-pink floral almond-toe pumps
16
+ shoes,pair of gray running shoes
17
+ shoes,Close up of a young skater girl's feet and skateboard
18
+ shoes,white black and red nike air max 90
19
+ shoes,a pair of white and brown shoes on a table
20
+ shoes,unpaired red Air Jordan 12
21
+ sneakers,a man sitting on a brick wall next to a skateboard
22
+ sneakers,a person wearing a hat
23
+ sneakers,black white and red nike high top sneaker
24
+ sneakers,two people sitting on the ground with their legs crossed
25
+ sneakers,pair of black-white-and-red Air Jordan 1 shoes
26
+ sneakers,Nike shoe lot
27
+ sneakers,white nike air force 1 low
28
+ sneakers,person wearing white nike sneakers
29
+ sneakers,a man holding a pair of purple shoes
30
+ sneakers,close-up photography of person wears brown-and-white Nike Air Max
31
+ sneakers,person holding white and red heart print box
32
+ sneakers,pair of Carhartt x Nike Air Force 1 shoes
33
+ sneakers,Legs and sneakers of teenage boys and girls standing on the sidewalk
34
+ sneakers,man sitting on the ledge of a building wearing Air Jordan 1 low-top shoes
35
+ sneakers,blue white and red neon light
36
+ sneakers,person wearing black pants and blue and white nike sneakers
37
+ heels,a person standing on top of a wooden stool
38
+ heels,woman in pink patent leather stilettos
39
+ heels,woman in black leather heeled shoes
40
+ heels,a pair of shoes sitting on top of a couch
41
+ heels,red and white plastic frame on white table
42
+ heels,pair of women's brown pointed-toe pumps on board
43
+ heels,woman wearing brown leather heeled sandals walking on staircase
44
+ heels,a pair of woman's legs wearing black gloves and red high heels
45
+ heels,a pair of purple high heeled shoes on a woman's leg
46
+ heels,person in black pants and red shoes
47
+ heels,womens white and silver peep toe pumps
48
+ heels,a woman wearing red high heels standing on a wooden floor
49
+ heels,black leather boot on white surface
50
+ heels,gray leather peep toe sandals
51
+ heels,brown leather boots on white table
52
+ heels,a woman's legs wearing red high heels
53
+ heels,black leather peep toe heeled shoes
54
+ heels,women's seven assorted-color footwear on surface
55
+ heels,womens brown leather peep toe heeled shoes
56
+ watches,a man is holding onto a railing in a room
57
+ watches,blue and silver analog watch at 10 00
58
+ watches,gold and white analog watch
59
+ watches,round silver-colored analog watch with black strap
60
+ watches,Close up of man putting watch on his hand.
61
+ watches,brown and white analog watch at 10 10
62
+ watches,round silver-colored watch on rack during sunset
63
+ watches,silver and black chronograph watch
64
+ watches,a person holding a cup of tea in their hand
65
+ watches,round gray analog watch with brown band
66
+ watches,watch at 10:34
67
+ watches,silver and white round analog watch
68
+ watches,Man in black suit wear new watches. Luxury style
69
+ watches,person wearing silver link bracelet round analog watch
70
+ watches,round silver-colored Nomos watch at 10:10
71
+ watches,silver link bracelet round chronograph watch
72
+ watches,a woman sitting at a table with a cell phone in her hand
73
+ watches,black and silver round analog watch
74
+ watches,watch at 8:45
75
+ watches,blue and gold analog watch
76
+ pants,three pairs of jeans are lined up on a white surface
77
+ pants,person in pink pants and white shoes
78
+ pants,blue denim jeans on white textile
79
+ pants,person wears blue jeans
80
+ pants,a pair of black leather boots sitting on top of a pile of blue jeans
81
+ pants,man wearing brown fitted jeans and sneakers standing on road at daytime
82
+ pants,three assorted-color denim bottoms
83
+ pants,blue denim jeans on brown clothes hanger
84
+ pants,a man walking across a bridge carrying a suitcase
85
+ pants,man in white t-shirt and black pants standing on white floor
86
+ pants,woman in orange pants
87
+ pants,selective focus photography of hanged denim jeans
88
+ pants,a person sitting on a bench with their legs crossed
89
+ pants,woman in blue denim jeans and white sneakers
90
+ pants,blue denim jeans on black surface
91
+ pants,a pair of hands holding a pair of jeans
92
+ pants,closeup photo of person hiding his right hand in his pocket
93
+ pants,woman in white tank top and blue denim jeans standing on beach during daytime
94
+ pants,woman in gray tank top and gray pants
95
+ clothing,a piece of cloth sitting on top of a table
96
+ clothing,"gray cardigan, blue jeans, and pair of brown chunky heeled shoes"
97
+ clothing,green clothes hanger
98
+ clothing,closeup of hanged shirts on rack
99
+ clothing,a woman holding a pair of jeans
100
+ clothing,women's white long sleeve shirt
101
+ clothing,pair of white low-top sneakers
102
+ clothing,hanged jeans lot
103
+ clothing,a close up of a pair of blue jeans
104
+ clothing,gray dress shirt hang on brown wooden rack in front of window with white curtain
105
+ clothing,white and red nike air force 1 high
106
+ clothing,woman standing selecting clothes
107
+ clothing,a stack of folded clothes on a table next to a lamp
108
+ clothing,woman waering black blouse and hat
109
+ clothing,woman in red long sleeve dress
110
+ clothing,assorted color folded shirts on wooden panel
111
+ clothing,a pile of folded clothes sitting on top of a brown leather chair
112
+ clothing,selective focus photography of hanged three gray tee shirts
113
+ clothing,white and blue cat-printed crew-neck T-shirt
114
+ clothing,assorted-color shirt lot hang on rack
115
+ dress,a woman sitting on a bed in a room
116
+ dress,woman in red sleeveless dress standing on gray concrete floor during daytime
117
+ dress,green sleeveless dress hanged on white wall
118
+ dress,woman in seashore
119
+ dress,a woman standing on a beach holding a banana leaf
120
+ dress,woman walking on seaside while holding woven bag
121
+ dress,woman in yellow and white floral dress
122
+ dress,woman wearing black turtleneck long-sleeved dress
123
+ dress,a woman kneeling on a white background posing for a picture
124
+ dress,woman walking down stair under clear blue sky during daytime
125
+ dress,black leather spaghetti strap dress
126
+ dress,woman standing in front of white wall
127
+ dress,woman in beige floral sleeveless dress
128
+ dress,woman wearing pink dress while standing near black metal rail at daytime
129
+ dress,a rack of clothes hanging on a wall
130
+ dress,woman in brown long sleeve dress standing beside gray wall
131
+ dress,woman in green long sleeve dress standing on brown field during daytime
132
+ dress,selective focus photo of smiling woman wearing black dress standing on concrete pavement
133
+ shirt,a close up of an umbrella with a metal handle
134
+ shirt,white crew neck t-shirt
135
+ shirt,man in white dress shirt wearing black sunglasses
136
+ shirt,gray button up long sleeve shirt
137
+ shirt,"A young businessman standing in corridor outside office, looking at camera."
138
+ shirt,black crew neck t-shirt
139
+ shirt,blue and white checkered dress shirt
140
+ shirt,man in black and white plaid button up shirt wearing black sunglasses
141
+ shirt,white button up shirt on clothes hanger
142
+ shirt,white crew neck long sleeve shirt
143
+ shirt,A fathers day greeting card concept. Flat lay. Copy space.
144
+ shirt,man in white dress shirt with black and white polka dots necktie
145
+ shirt,woman wearing orange crew-neck sweatshirt standing while putting right hand on her head
146
+ shirt,a person holding a white shirt on a hanger
147
+ shirt,man in blue dress shirt wearing black sunglasses
148
+ shirt,man in white crew neck t-shirt standing on green grass field during daytime
149
+ t-shirt,a young man wearing a black shirt and a chain around his neck
150
+ t-shirt,man wearing white crew-neck t-shirts
151
+ t-shirt,a woman sitting on the floor with her legs crossed
152
+ t-shirt,man in white crew neck t-shirt
153
+ t-shirt,smiling woman in black and white print t-shirt
154
+ t-shirt,woman in black crew neck t-shirt and blue denim jeans
155
+ t-shirt,photo of blue crew-neck tops
156
+ t-shirt,a woman in a white shirt and a pink hat
157
+ t-shirt,woman in black crew neck t-shirt standing near painting
158
+ t-shirt,men's white crew-neck t-shirt
159
+ t-shirt,man wearing white crew-neck t-shirt
160
+ t-shirt,man in pink crew neck t-shirt wearing black sunglasses
161
+ t-shirt,man in black crew neck t-shirt standing during daytime
Textual Dataset Generation/Dataset/textual_dataset.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
Textual Dataset Generation/main.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+
4
+ color_changes = [
5
+ ("black", "Change * color to black"),
6
+ ("white", "Change * color to white"),
7
+ ("red", "Change * color to red"),
8
+ ("blue", "Change * color to blue"),
9
+ ("orange", "Change * color to orange"),
10
+ ("purple", "Change * color to purple"),
11
+ ]
12
+
13
+ scene_changes = [
14
+ ("by the beach", "put * by the beach"),
15
+ ("in the desert", "put * in the desert"),
16
+ ("* in a festival", "put it in a festival"),
17
+ ("on a table", "put * on a table"),
18
+ ("in a forest", "put * in a forest"),
19
+ ("on a roof", "put * on a roof"),
20
+ ("in the air", "make * in the air"),
21
+ ("in hands", "put * in someone`s hands"),
22
+ ("covered with water", "put * in water"),
23
+ ]
24
+
25
+ season_changes = [
26
+ ("in snow", "add snow effects"),
27
+ ("in spring", "add spring effects"),
28
+ ("in summer", "add summer effects"),
29
+ ("in autumn", "add autumn effects"),
30
+ ("during daytime", "make the scene in daytime"),
31
+ ("during nighttime", "make the period in nighttime"),
32
+ ]
33
+
34
+ background_changes = [
35
+ ("in front of a mountain", "add mountain in the background"),
36
+ ("in front of the pyramids", "add the pyramids in the background"),
37
+ ]
38
+
39
+ PRODUCTS = [
40
+ "t-shirt",
41
+ "shirt",
42
+ "dress",
43
+ "pants",
44
+ "shoe",
45
+ "watch",
46
+ "vase",
47
+ "sneaker",
48
+ "headphone",
49
+ "bottle",
50
+ "perfume",
51
+ "vase",
52
+ "cup",
53
+ "camera",
54
+ "phone",
55
+ "mobile",
56
+ "bag",
57
+ "earpod",
58
+ "earbud",
59
+ "heel",
60
+ ]
61
+
62
+
63
+ def find_words(lst, input):
64
+ found_words = []
65
+ for w in lst:
66
+ indx = input.find(w)
67
+ if indx != -1:
68
+ if indx + len(w) + 1 < len(input) and input[indx + len(w) + 1] == "s":
69
+ w += "s"
70
+
71
+ found_words.append(w)
72
+
73
+ return found_words
74
+
75
+
76
+ def perform_color_changes(input, mixed_color=False):
77
+ if input.endswith("s"):
78
+ input = input[:-1]
79
+
80
+ original_input = input
81
+ result = []
82
+ found_words = find_words(PRODUCTS, input)
83
+
84
+ # remove colors to avoid repeated consecutive colors
85
+ for change, _ in color_changes:
86
+ changee = " "
87
+ if mixed_color == True:
88
+ changee = change + "-"
89
+
90
+ input = input.replace(str(" " + change + " "), changee).replace(" ", " ")
91
+
92
+ for word in found_words:
93
+ for change, change_text in color_changes:
94
+ edit = change_text.replace("*", word)
95
+ changed = str(change + " " + word)
96
+ output = input.replace(word, changed)
97
+
98
+ result.append({"caption": original_input, "edit": edit, "output": output})
99
+
100
+ return result
101
+
102
+
103
+ def perform_scene_changes(input):
104
+ original_input = input
105
+ for change, _ in scene_changes:
106
+ input = input.replace(change, "")
107
+
108
+ result = []
109
+ found_words = find_words(PRODUCTS, input)
110
+ for word in found_words:
111
+ for change, change_text in scene_changes:
112
+ edit = change_text
113
+ edit = edit.replace("*", word)
114
+
115
+ output = input + " " + change
116
+
117
+ result.append(
118
+ {
119
+ "caption": original_input,
120
+ "edit": edit,
121
+ "output": output.replace(" ", " "),
122
+ }
123
+ )
124
+
125
+ return result
126
+
127
+
128
+ def perform_affects_changes(input):
129
+ original_input = input
130
+ for change, _ in background_changes:
131
+ input = input.replace(change, "")
132
+
133
+ result = []
134
+ found_words = find_words(PRODUCTS, input)
135
+
136
+ for _ in found_words:
137
+ for change, change_text in season_changes:
138
+ edit = change_text
139
+ output = input + " " + change
140
+
141
+ result.append(
142
+ {
143
+ "caption": original_input,
144
+ "edit": edit,
145
+ "output": output.replace(" ", " "),
146
+ }
147
+ )
148
+
149
+ break
150
+ return result
151
+
152
+
153
+ def perform_background_changes(input):
154
+ original_input = input
155
+ for change, _ in background_changes:
156
+ input = input.replace(change, "")
157
+
158
+ result = []
159
+ found_words = find_words(PRODUCTS, input)
160
+
161
+ for _ in found_words:
162
+ for change, change_text in background_changes:
163
+ edit = change_text
164
+ output = input + " " + change
165
+
166
+ result.append(
167
+ {
168
+ "caption": original_input,
169
+ "edit": edit,
170
+ "output": output.replace(" ", " "),
171
+ }
172
+ )
173
+ break
174
+
175
+ return result
176
+
177
+
178
+ def get_rows(input): # return: list of dictionary
179
+ input = input.lower().replace("-", " ")
180
+ # remove brands
181
+ input = input.replace("nike ", "")
182
+ result = []
183
+
184
+ color_changes_result = perform_color_changes(input, False)
185
+ for new_row in color_changes_result:
186
+ result.append(new_row)
187
+
188
+ scene_changes_result = perform_scene_changes(input)
189
+ for new_row in scene_changes_result:
190
+ result.append(new_row)
191
+
192
+ affects_changes_result = perform_affects_changes(input)
193
+ for new_row in affects_changes_result:
194
+ result.append(new_row)
195
+
196
+ background_changes_result = perform_background_changes(input)
197
+ for new_row in background_changes_result:
198
+ result.append(new_row)
199
+
200
+ return result
201
+
202
+
203
+ def statistics(strs):
204
+ total = 0
205
+ stat = {}
206
+ for product in PRODUCTS:
207
+ cntr = sum(product.lower() in s.lower() for s in strs)
208
+ total += cntr
209
+ stat[product] = cntr
210
+
211
+ stat = dict(sorted(stat.items(), key=lambda item: item[1], reverse=True))
212
+
213
+ return stat, total
214
+
215
+
216
+ def load_dataset(path):
217
+
218
+ dataset = pd.read_csv(path)
219
+ return dataset
220
+
221
+
222
+ def save_dataset(data, file_path):
223
+ with open(file_path, "w") as file:
224
+ for idx in range(len(data)):
225
+ json_string = json.dumps(data[idx]) + ("\n" if idx < len(data) - 1 else "")
226
+ file.write(json_string)
227
+
228
+
229
+ def insert_row(df, new_data: list):
230
+ # Create a DataFrame with the new row data
231
+ new_data = pd.DataFrame(new_data, columns=df.columns)
232
+ # Append the new row to the existing DataFrame
233
+ df = df.append(new_data, ignore_index=True)
234
+ return df
235
+
236
+
237
+ def main():
238
+ input_dataset = load_dataset("Dataset/input_dataset.csv")
239
+ outout_dataset = []
240
+
241
+ for val in input_dataset["caption"]:
242
+ newData = get_rows(val)
243
+ if len(newData) > 0:
244
+ outout_dataset += newData
245
+
246
+ stat, total = statistics([item["caption"] for item in outout_dataset])
247
+ stat = pd.DataFrame([stat]).transpose()
248
+
249
+ print(rf"TOTAL : {total}")
250
+ print(stat)
251
+
252
+ save_dataset(outout_dataset, "Dataset/textual_dataset.jsonl")
253
+
254
+
255
+ if __name__ == "__main__":
256
+ main()
Textual Dataset Generation/web scrap.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import time
5
+
6
+
7
+ def scrap(url: str):
8
+ try:
9
+ response = requests.get(url)
10
+ response.raise_for_status() # Raise an error for bad status codes
11
+ except requests.exceptions.RequestException as e:
12
+ print(f"Error fetching {url}: {e}")
13
+ return None
14
+
15
+ # Parse the HTML content
16
+ soup = BeautifulSoup(response.content, "html.parser")
17
+
18
+ div_elements = soup.find_all("div", class_="WxXog")
19
+ result = []
20
+ for div in div_elements:
21
+ img_tag = div.find("img")
22
+ if img_tag:
23
+ alt_text = img_tag.get("alt")
24
+ if alt_text is not None and len(alt_text) < 100:
25
+ result.append(alt_text)
26
+
27
+ return result
28
+
29
+
30
+ def main():
31
+ data = []
32
+
33
+ categories = [
34
+ "shoes",
35
+ "sneakers",
36
+ "heels",
37
+ "watches",
38
+ "pants",
39
+ "clothing",
40
+ "dress",
41
+ "shirt",
42
+ "t-shirt",
43
+ ]
44
+ seen_inputs = set()
45
+ base_url = "https://unsplash.com/s/photos/"
46
+
47
+ for category in categories:
48
+ url = f"{base_url}{category}"
49
+ print(f"Scraping {url}...")
50
+ res = scrap(url)
51
+ if res is not None:
52
+ for t in res:
53
+ if t not in seen_inputs:
54
+ data.append({"category": category, "caption": t})
55
+ seen_inputs.add(t)
56
+
57
+ time.sleep(1) # Delay to avoid rate limiting
58
+
59
+ df = pd.DataFrame(data)
60
+
61
+ # Save to CSV
62
+ df.to_csv("input_dataset.csv", index=False)
63
+
64
+
65
+ if __name__ == "__main__":
66
+ main()