Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +164 -0
- README.md +27 -13
- app.py +584 -0
- data/embeddings/audio_index.npz +3 -0
- data/embeddings/image_index.npz +3 -0
- data/freesound/audio/fs_001_nature_Autumn_leaves_falling_on_forest_floor_(close,_loopable).wav +3 -0
- data/freesound/audio/fs_002_nature_woodpeckers_Haanwijk_1206_PM_250306_1056.wav +3 -0
- data/freesound/audio/fs_003_nature_Bird_-_Tawny_owl,_female.wav +3 -0
- data/freesound/audio/fs_004_nature_Bird_Chirps.wav +3 -0
- data/freesound/audio/fs_005_nature_Birds_Chirping_Busy.wav.wav +3 -0
- data/freesound/audio/fs_006_nature_bird_in_oostduinen_17.wav.wav +3 -0
- data/freesound/audio/fs_007_nature_wind_bushes_coast_903_PM_240311_0680.wav +3 -0
- data/freesound/audio/fs_008_nature_estate_farm_noise_1153AM3_220509_0343.wav.wav +3 -0
- data/freesound/audio/fs_009_nature_Howler_monkey_howling_in_the_evening_jungle.wav +3 -0
- data/freesound/audio/fs_010_nature_forest_heavy_rain_loop.wav.wav +3 -0
- data/freesound/audio/fs_011_nature_CardinalChirpingParabolicMP3VersionMay62013.mp3.wav +3 -0
- data/freesound/audio/fs_012_nature_Superb_Lyrebird_II.wav +3 -0
- data/freesound/audio/fs_013_nature_Night_Wind_Chimes.wav +3 -0
- data/freesound/audio/fs_014_nature_Southern_Summer_Evening_Ambience_with_Crickets_4.wav +3 -0
- data/freesound/audio/fs_015_nature_Li_River._Crickets_&_Frogs.wav +3 -0
- data/freesound/audio/fs_016_nature_Thunder_sound_effect_2.wav +3 -0
- data/freesound/audio/fs_017_nature_Rain_medium_-_falling_on_soft_surface.wav +3 -0
- data/freesound/audio/fs_018_nature_Thunder_6.wav +3 -0
- data/freesound/audio/fs_019_nature_Water_Flowing_into_Underground_Sinkhole.wav +3 -0
- data/freesound/audio/fs_020_nature_Water_Flowing_Through_Close_Rapids_6.wav +3 -0
- data/freesound/audio/fs_021_nature_Water_Flowing_Through_Close_Rapids_5.wav +3 -0
- data/freesound/audio/fs_022_urban_CITY_AMBIENCE_for_CLEAN_LOOP.wav.wav +3 -0
- data/freesound/audio/fs_023_urban_Kitchen_Ambience_During_Daytime_.wav.wav +3 -0
- data/freesound/audio/fs_024_urban_police_in_Paris.wav.wav +3 -0
- data/freesound/audio/fs_025_urban_BerlinGameScene.com_Crowd_Cheer_4.wav +3 -0
- data/freesound/audio/fs_026_urban_Да_ладно.wav +3 -0
- data/freesound/audio/fs_027_urban_Restaurant_5.mp3.wav +3 -0
- data/freesound/audio/fs_028_urban_Industrial_grinder.wav +3 -0
- data/freesound/audio/fs_029_urban_Continuous_mechanical_whir_.wav +3 -0
- data/freesound/audio/fs_030_urban_Electric_saw.wav +3 -0
- data/freesound/audio/fs_031_urban_Renault_Master_F3500_dCi135_Foley_Horn_Outside_Mono.wav.wav +3 -0
- data/freesound/audio/fs_032_urban_Street_atmosphere_and_sounds_in_Rome.wav +3 -0
- data/freesound/audio/fs_033_urban_Distant_horn_beeps_09.flac.wav +3 -0
- data/freesound/audio/fs_034_urban_Underground_Train_Station_Ambience.wav +3 -0
- data/freesound/audio/fs_035_urban_subwaystartsequence.wav.wav +3 -0
- data/freesound/audio/fs_036_urban_Train,_Subway,_Pass_By,_Under_Bridge_Perspective.wav +3 -0
- data/freesound/audio/fs_037_urban_SFX_Donkey_Cart.MP3.wav +3 -0
- data/freesound/audio/fs_038_urban_people_talking_at_street_market.wav +3 -0
- data/freesound/audio/fs_039_urban_Japan_Tokyo_Shinjuku_Street_Promoter_Yelling_City.wav.wav +3 -0
- data/freesound/audio/fs_040_water_waves_close_coast_small_004.wav.wav +3 -0
- data/freesound/audio/fs_041_water_wave_sand_beach_012.wav.wav +3 -0
- data/freesound/audio/fs_042_water_waves_over_mall_shells_01_150515_00.wav.wav +3 -0
- data/freesound/audio/fs_043_water_Rain_on_a_Plastic_Roof.wav +3 -0
- data/freesound/audio/fs_044_water_Ambiance_Waterfall_Big_Skogafoss_Far_Loop_Stereo_02.wav.wav +3 -0
- data/freesound/audio/fs_045_water_Water_Stream_1.wav +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,167 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
data/freesound/audio/fs_001_nature_Autumn_leaves_falling_on_forest_floor_(close,_loopable).wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data/freesound/audio/fs_002_nature_woodpeckers_Haanwijk_1206_PM_250306_1056.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/freesound/audio/fs_003_nature_Bird_-_Tawny_owl,_female.wav filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/freesound/audio/fs_004_nature_Bird_Chirps.wav filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
data/freesound/audio/fs_005_nature_Birds_Chirping_Busy.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
data/freesound/audio/fs_006_nature_bird_in_oostduinen_17.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
data/freesound/audio/fs_007_nature_wind_bushes_coast_903_PM_240311_0680.wav filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
data/freesound/audio/fs_008_nature_estate_farm_noise_1153AM3_220509_0343.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
data/freesound/audio/fs_009_nature_Howler_monkey_howling_in_the_evening_jungle.wav filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
data/freesound/audio/fs_010_nature_forest_heavy_rain_loop.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
data/freesound/audio/fs_011_nature_CardinalChirpingParabolicMP3VersionMay62013.mp3.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
data/freesound/audio/fs_012_nature_Superb_Lyrebird_II.wav filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
data/freesound/audio/fs_013_nature_Night_Wind_Chimes.wav filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
data/freesound/audio/fs_014_nature_Southern_Summer_Evening_Ambience_with_Crickets_4.wav filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
data/freesound/audio/fs_015_nature_Li_River._Crickets_&_Frogs.wav filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
data/freesound/audio/fs_016_nature_Thunder_sound_effect_2.wav filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
data/freesound/audio/fs_017_nature_Rain_medium_-_falling_on_soft_surface.wav filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
data/freesound/audio/fs_018_nature_Thunder_6.wav filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
data/freesound/audio/fs_019_nature_Water_Flowing_into_Underground_Sinkhole.wav filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
data/freesound/audio/fs_020_nature_Water_Flowing_Through_Close_Rapids_6.wav filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
data/freesound/audio/fs_021_nature_Water_Flowing_Through_Close_Rapids_5.wav filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
data/freesound/audio/fs_022_urban_CITY_AMBIENCE_for_CLEAN_LOOP.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
data/freesound/audio/fs_023_urban_Kitchen_Ambience_During_Daytime_.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
data/freesound/audio/fs_024_urban_police_in_Paris.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
data/freesound/audio/fs_025_urban_BerlinGameScene.com_Crowd_Cheer_4.wav filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
data/freesound/audio/fs_026_urban_Да_ладно.wav filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
data/freesound/audio/fs_027_urban_Restaurant_5.mp3.wav filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
data/freesound/audio/fs_028_urban_Industrial_grinder.wav filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
data/freesound/audio/fs_029_urban_Continuous_mechanical_whir_.wav filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
data/freesound/audio/fs_030_urban_Electric_saw.wav filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
data/freesound/audio/fs_031_urban_Renault_Master_F3500_dCi135_Foley_Horn_Outside_Mono.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
data/freesound/audio/fs_032_urban_Street_atmosphere_and_sounds_in_Rome.wav filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
data/freesound/audio/fs_033_urban_Distant_horn_beeps_09.flac.wav filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
data/freesound/audio/fs_034_urban_Underground_Train_Station_Ambience.wav filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
data/freesound/audio/fs_035_urban_subwaystartsequence.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
data/freesound/audio/fs_036_urban_Train,_Subway,_Pass_By,_Under_Bridge_Perspective.wav filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
data/freesound/audio/fs_037_urban_SFX_Donkey_Cart.MP3.wav filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
data/freesound/audio/fs_038_urban_people_talking_at_street_market.wav filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
data/freesound/audio/fs_039_urban_Japan_Tokyo_Shinjuku_Street_Promoter_Yelling_City.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
data/freesound/audio/fs_040_water_waves_close_coast_small_004.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
data/freesound/audio/fs_041_water_wave_sand_beach_012.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
data/freesound/audio/fs_042_water_waves_over_mall_shells_01_150515_00.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
data/freesound/audio/fs_043_water_Rain_on_a_Plastic_Roof.wav filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
data/freesound/audio/fs_044_water_Ambiance_Waterfall_Big_Skogafoss_Far_Loop_Stereo_02.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
data/freesound/audio/fs_045_water_Water_Stream_1.wav filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
data/freesound/audio/fs_046_water_Rushing_River_Loop.wav filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
data/freesound/audio/fs_047_water_Water_Game_Theme_Loop_2.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
data/freesound/audio/fs_048_water_waves_sand_beach_048.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
data/freesound/audio/fs_049_water_waves_sand_beach_013.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
data/freesound/audio/fs_050_water_Rain_Hitting_Puddle.wav filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
data/freesound/audio/fs_051_water_EXT_RainInPuddle.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
data/freesound/audio/fs_052_water_Heavy_Rain,_Outside.wav filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
data/freesound/audio/fs_053_water_09_Stream,_Dirty_Sewer,_Dark,_Trickling,_Burbling,_Flowing,_Underwa.wav filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
data/freesound/audio/fs_054_water_15_ca_pads.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
data/freesound/audio/fs_055_water_12_Stream,_Muddy_Current,_Dark,_Trickling,_Drips,_Flowing,_Underwat.wav filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
data/freesound/audio/fs_056_water_r18_babbling_brook.wav filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
data/freesound/audio/fs_057_water_Running_water_flowing.wav filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
data/freesound/audio/fs_058_water_Creek_02.wav filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
data/freesound/audio/fs_059_water_2019-06-14_Nyord_Hafen_2_mono_niere.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
data/freesound/audio/fs_060_water_Ambiance_marina_1154_AM_220801_0468.wav filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
data/freesound/audio/fs_061_nature_Thunder_08.wav filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
data/freesound/audio/fs_062_nature_Close_lightning_strike.wav filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
data/freesound/audio/fs_063_nature_Rain_and_Thunder_3.wav filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
data/freesound/audio/fs_064_nature_heavy_rain_outside.wav filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
data/freesound/audio/fs_065_nature_Heavy_Rain_Thunder_UK_Cambridge_Short.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
data/freesound/audio/fs_066_nature_Heavy_Rain.wav filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
data/freesound/audio/fs_067_nature_room-tone_wind_rain_11_200216_0114.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
data/freesound/audio/fs_068_nature_wind_gap_indoors_013_170305_1100.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
data/freesound/audio/fs_069_nature_20190809.howling131.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
data/freesound/audio/fs_070_nature_Tires_on_Gravel_Road_1.wav filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
data/freesound/audio/fs_071_nature_novasoundhail260.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
data/freesound/audio/fs_072_nature_Raekuuro_ikkunaa_vasten___Hail_hitting_against_a_window.wav filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
data/freesound/audio/fs_073_nature_dogs_barking_mono_4824.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
data/freesound/audio/fs_074_nature_dog_barking.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
data/freesound/audio/fs_075_nature_Barking_Dog.wav filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
data/freesound/audio/fs_076_nature_Cat_purring.wav filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
data/freesound/audio/fs_077_nature_cat_eating.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
data/freesound/audio/fs_078_nature_Cat_Purring.wav filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
data/freesound/audio/fs_079_nature_estate_spring_NL_EU_1235PM_220509_0434.wav filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
data/freesound/audio/fs_080_nature_farmfield_817_AM_NL_EU_220515_0345.wav filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
data/freesound/audio/fs_081_nature_edge_forest_735AM_210221_0257.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
data/freesound/audio/fs_082_nature_frogs_lake_night_spot_3.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
data/freesound/audio/fs_083_nature_night-frogs2.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
data/freesound/audio/fs_084_nature_200703101950PacificChorusFrogs1.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
data/freesound/audio/fs_085_nature_bees_and_birds_170601_1190.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
data/freesound/audio/fs_086_nature_Cicada_Insects_8_25_13_10_06_PM_1.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
data/freesound/audio/fs_087_nature_Cicadas.wav filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
data/freesound/audio/fs_088_urban_Typing_2.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
data/freesound/audio/fs_089_urban_Busy_Office_No_People_Loop.wav filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
data/freesound/audio/fs_090_urban_Keyboard_typing.WAV.wav filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
data/freesound/audio/fs_091_urban_pan_fry2.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
data/freesound/audio/fs_092_urban_meat_grill.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
data/freesound/audio/fs_093_urban_boilingtomatosauce.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
data/freesound/audio/fs_094_urban_Fire_-_Swooshes_-_Burst_Evolving_32.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
data/freesound/audio/fs_095_urban_Fireplace.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
data/freesound/audio/fs_096_urban_Fire_-_Chimney___Stove.wav filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
data/freesound/audio/fs_097_urban_Clock_sound.mp3.wav filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
data/freesound/audio/fs_098_urban_Clock_ticking.wav.wav filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
data/freesound/audio/fs_099_urban_Clock_Ticking.wav filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
data/processed/audio/city_people_traffic.wav filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
data/processed/audio/forest_birds_wind_01.wav filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
data/processed/audio/ocean_waves_wind_01.wav filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
data/processed/audio/ocean_waves_wind_02.wav filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
data/processed/audio/rain_city_traffic_01.wav filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
data/processed/images/beach_waves_01.png filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
data/processed/images/beach_waves_02.png filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
data/processed/images/beach_waves_03.png filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
data/processed/images/city01.png filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
data/processed/images/city_02.png filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
data/processed/images/city_neo_02.png filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
data/processed/images/city_neon_01.png filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
data/processed/images/forest_morning_fog_02.png filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
data/processed/images/forest_mountain01.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
data/wikimedia/images/beach_waves_01.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
data/wikimedia/images/city01.png filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
data/wikimedia/images/city_02.png filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
data/wikimedia/images/city_neon_01.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
data/wikimedia/images/forest_morning_fog_02.png filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
data/wikimedia/images/wm_001_nature_01-พระที่นั่งคูหาคฤหาสน์.jpg filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
data/wikimedia/images/wm_002_nature_1_pano_cuiping_yangshuo_2016.jpg filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
data/wikimedia/images/wm_004_nature_2013_Cogden_Bridge.jpg filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
data/wikimedia/images/wm_005_nature_2013_Rainbow_over_Washfold.jpg filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
data/wikimedia/images/wm_006_nature_2014_Track_on_Fremington_Edge.jpg filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
data/wikimedia/images/wm_007_nature_2014_Yorkshire_Dales_country_road_Swaledale_Askrigg.jpg filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
data/wikimedia/images/wm_008_nature_2015_Ribblehead_Viaduct_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
data/wikimedia/images/wm_009_nature_2015_Swaledale_from_Kisdon_Hill.jpg filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
data/wikimedia/images/wm_010_nature_2018_-_Château_fort_de_Lourdes.jpg filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
data/wikimedia/images/wm_011_nature_01_Calanche_Piana.jpg filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
data/wikimedia/images/wm_012_nature_01_Gorges_du_Tarn_Roc_des_Hourtous.jpg filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
data/wikimedia/images/wm_013_nature_1_lake_louise_pano_2019.jpg filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
data/wikimedia/images/wm_014_nature_1_tianzishan_wulingyuan_zhangjiajie_2012.jpg filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
data/wikimedia/images/wm_015_nature_1_zhangjiajie_huangshizhai_wulingyuan_panorama_2012.jpg filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
data/wikimedia/images/wm_016_nature_150906-001_Wilpena_Pound_from_Moralana_Scenic_Drive_Pano_50pc.jpg filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
data/wikimedia/images/wm_017_nature_20090719_Crkva_Gospa_od_Zdravlja_Kotor_Bay_Montenegro.jpg filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
data/wikimedia/images/wm_019_nature_21-224-5054_NNP_Synevyr_RB_18.jpg filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
data/wikimedia/images/wm_020_nature_80_-_Machu_Picchu_-_Juin_2009_-_edit.jpg filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
data/wikimedia/images/wm_021_nature_2014_Park_w_Kłodzku.jpg filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
data/wikimedia/images/wm_022_nature_Andiast-Breil-Brigels._24-09-2025._(d.j.b.)_08.jpg filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
data/wikimedia/images/wm_023_nature_Ansberg_Blickrichtung_Süden_120324.jpg filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
data/wikimedia/images/wm_024_nature_Baumsilhouetten_am_Uetliberg_mit_Novembernebel_und_Sonnenstrahlen.jpg filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
data/wikimedia/images/wm_025_nature_Beech_and_ferns_in_Gullmarsskogen.jpg filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
data/wikimedia/images/wm_026_urban_128_Balconies_of_1390_Market_Street,_San_Francisco.jpg filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
data/wikimedia/images/wm_027_urban_14-02-02-straszburg-RalfR-113.jpg filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
data/wikimedia/images/wm_028_urban_140626_Tierser_Alpl_Rosszähne.jpg filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
data/wikimedia/images/wm_029_urban_Wroclaw_-_Hala_Stulecia_03.jpg filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
data/wikimedia/images/wm_030_urban_2014_Bożków,_pałac_01.jpg filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
data/wikimedia/images/wm_031_urban_2015_Browar_w_Radkowie.jpg filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
data/wikimedia/images/wm_032_urban_2015_Wieża_mieszkalna_w_Żelaźnie_01.jpg filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
data/wikimedia/images/wm_033_urban_2016_Pałac_w_Łomnicy_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
data/wikimedia/images/wm_034_urban_2016_Pałac_w_Żelaźnie_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
data/wikimedia/images/wm_035_urban_2016_Phnom_Penh,_Pałac_Królewski,_Preah_Tineang_Phhochani_(14).jpg filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
data/wikimedia/images/wm_036_urban_2016_Phnom_Penh,_Pałac_Królewski,_Srebrna_Pagoda_(02).jpg filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
data/wikimedia/images/wm_038_urban_15-10-28-Pont_Bac_de_Roda_Barcelona-RalfR-WMA_3105.jpg filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
data/wikimedia/images/wm_040_urban_2014_Kłodzko,_most_gotycki.jpg filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
data/wikimedia/images/wm_041_urban_2017_-_Київ_-_Світанок_над_Дніпром.jpg filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
data/wikimedia/images/wm_042_urban_2024_Most_Żelazny_w_Kłodzku_(8),_powódź.jpg filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
data/wikimedia/images/wm_043_urban_2024_Wiadukt_kolejowy_w_Lewinie_Kłodzkim_(11).jpg filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
data/wikimedia/images/wm_044_urban_2Fi02473_Pont_national_US_Army_retouchée.jpg filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
data/wikimedia/images/wm_045_urban_A_pier_at_a_campsite_during_sunset,_Sidney_Spit_(part_of_Gulf_Islands_National_Park_Reserve),_Sidney_Island,_British_Columbia,_Canada_20.jpg filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
data/wikimedia/images/wm_046_urban_A5_Aarebruecke.jpg filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
data/wikimedia/images/wm_047_urban_Anjarle_Bridge_and_Cows-fix.jpg filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
data/wikimedia/images/wm_048_water_Acapulco_fishermen.jpg filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
data/wikimedia/images/wm_049_water_Accès_plage,_Sainte-Marie,_Ré_island,_august_2015.jpg filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
data/wikimedia/images/wm_050_water_Amphitheatre_Bay_after_a_storm,_Akamas_Peninsula,_Cyprus.jpg filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,19 +1,33 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
- streamlit
|
| 10 |
pinned: false
|
| 11 |
-
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
| 15 |
|
| 16 |
-
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Multimodal Coherence AI
|
| 3 |
+
emoji: "\U0001f3a8"
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: "1.41.0"
|
| 8 |
+
app_file: app.py
|
|
|
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: Coherent text + image + audio with MSCI
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Multimodal Coherence AI
|
| 15 |
|
| 16 |
+
Generate semantically coherent **text + image + audio** bundles and evaluate
|
| 17 |
+
cross-modal alignment using the **Multimodal Semantic Coherence Index (MSCI)**.
|
| 18 |
|
| 19 |
+
## How it works
|
| 20 |
+
|
| 21 |
+
1. **Text** — generated via HF Inference API
|
| 22 |
+
2. **Image** — retrieved from a curated index using CLIP (ViT-B/32) embeddings
|
| 23 |
+
3. **Audio** — retrieved from a curated index using CLAP (HTSAT-unfused) embeddings
|
| 24 |
+
4. **MSCI** — computed as `0.45 * cos_sim(text, image) + 0.45 * cos_sim(text, audio)`
|
| 25 |
+
|
| 26 |
+
## Research
|
| 27 |
+
|
| 28 |
+
This demo accompanies a study evaluating multimodal semantic coherence across
|
| 29 |
+
three research questions:
|
| 30 |
+
|
| 31 |
+
- **RQ1**: Is MSCI sensitive to controlled semantic perturbations? (Supported, d > 2.0)
|
| 32 |
+
- **RQ2**: Does structured planning improve cross-modal alignment? (Not supported)
|
| 33 |
+
- **RQ3**: Does MSCI correlate with human coherence judgments? (Supported, rho = 0.379)
|
app.py
ADDED
|
@@ -0,0 +1,584 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multimodal Coherence AI — Hugging Face Spaces Demo
|
| 3 |
+
|
| 4 |
+
Live demonstration of multimodal generation + coherence evaluation.
|
| 5 |
+
Enter a scene description and the system produces coherent text, image,
|
| 6 |
+
and audio with real-time MSCI scoring.
|
| 7 |
+
|
| 8 |
+
Pipeline: HF Inference API (text) + CLIP retrieval (image) + CLAP retrieval (audio)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
import time
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Optional
|
| 19 |
+
|
| 20 |
+
import streamlit as st
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Paths
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
PROJECT_ROOT = Path(__file__).resolve().parent
|
| 26 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
IMAGE_SIM_THRESHOLD = 0.20
|
| 31 |
+
AUDIO_SIM_THRESHOLD = 0.10
|
| 32 |
+
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
# Custom CSS
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
CUSTOM_CSS = """
|
| 37 |
+
<style>
|
| 38 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@500;700&display=swap');
|
| 39 |
+
|
| 40 |
+
.block-container { padding-top: 1.2rem !important; max-width: 1200px; }
|
| 41 |
+
html, body, [class*="css"] { font-family: 'Inter', -apple-system, sans-serif; }
|
| 42 |
+
|
| 43 |
+
.hero-wrap { text-align: center; padding: 1.5rem 0 1rem; }
|
| 44 |
+
.hero-title {
|
| 45 |
+
font-size: 2.6rem; font-weight: 800; letter-spacing: -0.03em;
|
| 46 |
+
background: linear-gradient(135deg, #818cf8 0%, #c084fc 50%, #f472b6 100%);
|
| 47 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 48 |
+
margin-bottom: 0.35rem;
|
| 49 |
+
}
|
| 50 |
+
.hero-sub {
|
| 51 |
+
font-size: 1rem; color: #94a3b8; max-width: 600px;
|
| 52 |
+
margin: 0 auto; line-height: 1.6;
|
| 53 |
+
}
|
| 54 |
+
.hero-sub b { color: #c4b5fd; }
|
| 55 |
+
|
| 56 |
+
.stTextArea textarea {
|
| 57 |
+
border-radius: 14px !important;
|
| 58 |
+
border: 1.5px solid rgba(129,140,248,0.25) !important;
|
| 59 |
+
font-size: 0.95rem !important; padding: 0.9rem 1rem !important;
|
| 60 |
+
transition: border-color 0.2s;
|
| 61 |
+
}
|
| 62 |
+
.stTextArea textarea:focus {
|
| 63 |
+
border-color: rgba(129,140,248,0.6) !important;
|
| 64 |
+
box-shadow: 0 0 0 3px rgba(129,140,248,0.1) !important;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.chip-row { display: flex; gap: 0.4rem; flex-wrap: wrap; align-items: center; padding-top: 0.3rem; }
|
| 68 |
+
.chip {
|
| 69 |
+
display: inline-flex; align-items: center; gap: 0.3rem;
|
| 70 |
+
padding: 0.22rem 0.7rem; border-radius: 20px;
|
| 71 |
+
font-size: 0.7rem; font-weight: 600; letter-spacing: 0.03em;
|
| 72 |
+
}
|
| 73 |
+
.chip-purple { background: rgba(129,140,248,0.14); color: #a5b4fc; }
|
| 74 |
+
.chip-green { background: rgba(52,211,153,0.14); color: #6ee7b7; }
|
| 75 |
+
.chip-dot { width: 6px; height: 6px; border-radius: 50%; }
|
| 76 |
+
.chip-dot-purple { background: #818cf8; }
|
| 77 |
+
.chip-dot-green { background: #34d399; }
|
| 78 |
+
|
| 79 |
+
.scores-grid {
|
| 80 |
+
display: grid; grid-template-columns: repeat(4, 1fr);
|
| 81 |
+
gap: 0.75rem; margin: 0.5rem 0 0.3rem;
|
| 82 |
+
}
|
| 83 |
+
@media (max-width: 768px) { .scores-grid { grid-template-columns: repeat(2, 1fr); } }
|
| 84 |
+
.sc {
|
| 85 |
+
border-radius: 16px; padding: 1.1rem 0.8rem; text-align: center;
|
| 86 |
+
border: 1px solid rgba(255,255,255,0.06);
|
| 87 |
+
background: rgba(255,255,255,0.02);
|
| 88 |
+
backdrop-filter: blur(10px);
|
| 89 |
+
position: relative; overflow: hidden;
|
| 90 |
+
}
|
| 91 |
+
.sc::before {
|
| 92 |
+
content: ''; position: absolute; top: 0; left: 0; right: 0; height: 3px;
|
| 93 |
+
border-radius: 16px 16px 0 0;
|
| 94 |
+
}
|
| 95 |
+
.sc-high::before { background: linear-gradient(90deg, #10b981, #34d399); }
|
| 96 |
+
.sc-mid::before { background: linear-gradient(90deg, #f59e0b, #fbbf24); }
|
| 97 |
+
.sc-low::before { background: linear-gradient(90deg, #ef4444, #fb7185); }
|
| 98 |
+
.sc-class::before { background: linear-gradient(90deg, #818cf8, #c084fc); }
|
| 99 |
+
.sc-lbl {
|
| 100 |
+
font-size: 0.65rem; text-transform: uppercase; letter-spacing: 0.1em;
|
| 101 |
+
color: #64748b; margin-bottom: 0.4rem; font-weight: 600;
|
| 102 |
+
}
|
| 103 |
+
.sc-val {
|
| 104 |
+
font-size: 1.9rem; font-weight: 700; line-height: 1.1;
|
| 105 |
+
font-family: 'JetBrains Mono', monospace;
|
| 106 |
+
}
|
| 107 |
+
.sc-high .sc-val { color: #34d399; }
|
| 108 |
+
.sc-mid .sc-val { color: #fbbf24; }
|
| 109 |
+
.sc-low .sc-val { color: #fb7185; }
|
| 110 |
+
.sc-class .sc-val { font-size: 1.15rem; font-family: 'Inter', sans-serif; color: #c4b5fd; }
|
| 111 |
+
.sc-badge {
|
| 112 |
+
display: inline-block; margin-top: 0.35rem; padding: 0.15rem 0.55rem;
|
| 113 |
+
border-radius: 20px; font-size: 0.6rem; font-weight: 700;
|
| 114 |
+
text-transform: uppercase; letter-spacing: 0.07em;
|
| 115 |
+
}
|
| 116 |
+
.sc-high .sc-badge { background: rgba(52,211,153,0.12); color: #34d399; }
|
| 117 |
+
.sc-mid .sc-badge { background: rgba(251,191,36,0.12); color: #fbbf24; }
|
| 118 |
+
.sc-low .sc-badge { background: rgba(251,113,133,0.12); color: #fb7185; }
|
| 119 |
+
.sc-class .sc-badge { background: rgba(196,181,253,0.12); color: #c4b5fd; }
|
| 120 |
+
|
| 121 |
+
.sec-label {
|
| 122 |
+
font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.12em;
|
| 123 |
+
font-weight: 700; margin-bottom: 0.6rem; padding-bottom: 0.35rem;
|
| 124 |
+
border-bottom: 2px solid rgba(129,140,248,0.15); color: #818cf8;
|
| 125 |
+
}
|
| 126 |
+
.text-card {
|
| 127 |
+
border-radius: 14px; padding: 1.1rem 1.2rem;
|
| 128 |
+
background: rgba(255,255,255,0.02); border: 1px solid rgba(255,255,255,0.06);
|
| 129 |
+
font-size: 0.9rem; line-height: 1.75; color: #cbd5e1;
|
| 130 |
+
}
|
| 131 |
+
.timing {
|
| 132 |
+
display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center;
|
| 133 |
+
padding: 0.4rem 0.8rem; border-radius: 10px;
|
| 134 |
+
background: rgba(255,255,255,0.02); border: 1px solid rgba(255,255,255,0.04);
|
| 135 |
+
font-size: 0.72rem; color: #64748b; margin: 0.4rem 0;
|
| 136 |
+
}
|
| 137 |
+
.timing span { white-space: nowrap; }
|
| 138 |
+
.timing .t-total { color: #a5b4fc; font-weight: 700; }
|
| 139 |
+
.timing .t-sep { color: rgba(255,255,255,0.08); }
|
| 140 |
+
|
| 141 |
+
.warn-banner {
|
| 142 |
+
border-radius: 12px; padding: 0.7rem 1rem; margin-bottom: 0.6rem;
|
| 143 |
+
border-left: 3px solid #fbbf24; font-size: 0.82rem; color: #fcd34d;
|
| 144 |
+
background: rgba(251,191,36,0.05);
|
| 145 |
+
}
|
| 146 |
+
.warn-banner b { color: #fde68a; }
|
| 147 |
+
|
| 148 |
+
.sb { margin: 0.35rem 0; }
|
| 149 |
+
.sb-top { display: flex; justify-content: space-between; font-size: 0.68rem; color: #64748b; margin-bottom: 0.15rem; }
|
| 150 |
+
.sb-top .sb-v { font-family: 'JetBrains Mono', monospace; font-weight: 600; }
|
| 151 |
+
.sb-track { height: 5px; border-radius: 3px; background: rgba(255,255,255,0.05); overflow: hidden; }
|
| 152 |
+
.sb-fill { height: 100%; border-radius: 3px; }
|
| 153 |
+
.sbf-g { background: linear-gradient(90deg, #10b981, #34d399); }
|
| 154 |
+
.sbf-y { background: linear-gradient(90deg, #f59e0b, #fbbf24); }
|
| 155 |
+
.sbf-r { background: linear-gradient(90deg, #ef4444, #fb7185); }
|
| 156 |
+
|
| 157 |
+
.welcome { text-align: center; padding: 4rem 2rem; color: #475569; }
|
| 158 |
+
.welcome-icons { font-size: 3.5rem; margin-bottom: 0.8rem; letter-spacing: 0.3rem; }
|
| 159 |
+
.welcome-text { font-size: 1.05rem; color: #64748b; }
|
| 160 |
+
.welcome-hint { font-size: 0.82rem; color: #475569; margin-top: 0.3rem; }
|
| 161 |
+
|
| 162 |
+
section[data-testid="stSidebar"] > div:first-child { padding-top: 1.2rem; }
|
| 163 |
+
.sidebar-info {
|
| 164 |
+
font-size: 0.72rem; color: #64748b; line-height: 1.6;
|
| 165 |
+
padding: 0.8rem; border-radius: 10px;
|
| 166 |
+
background: rgba(255,255,255,0.02); border: 1px solid rgba(255,255,255,0.04);
|
| 167 |
+
}
|
| 168 |
+
.sidebar-info b { color: #94a3b8; }
|
| 169 |
+
</style>
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
# ---------------------------------------------------------------------------
|
| 173 |
+
# Example prompts
|
| 174 |
+
# ---------------------------------------------------------------------------
|
| 175 |
+
EXAMPLE_PROMPTS = {
|
| 176 |
+
"Nature": [
|
| 177 |
+
"A peaceful forest at dawn with birdsong and morning mist",
|
| 178 |
+
"A field of golden wheat under a warm summer sunset",
|
| 179 |
+
"A dense jungle with exotic birds calling from the canopy",
|
| 180 |
+
],
|
| 181 |
+
"Urban": [
|
| 182 |
+
"A bustling city street at night with neon lights and traffic",
|
| 183 |
+
"A quiet alley in an old town with distant footsteps echoing",
|
| 184 |
+
"A cafe terrace on a busy boulevard with clinking glasses",
|
| 185 |
+
],
|
| 186 |
+
"Water": [
|
| 187 |
+
"Ocean waves crashing on a sandy beach at sunset",
|
| 188 |
+
"Rain falling on a pond with ripples spreading across the surface",
|
| 189 |
+
"A mountain stream flowing over rocks through a pine forest",
|
| 190 |
+
],
|
| 191 |
+
"Mixed": [
|
| 192 |
+
"A lighthouse on a cliff during a thunderstorm at night",
|
| 193 |
+
"A bonfire on a beach with waves and guitar music at night",
|
| 194 |
+
"A train passing through countryside with distant church bells",
|
| 195 |
+
],
|
| 196 |
+
}
|
| 197 |
+
DOMAIN_ICONS = {"nature": "\U0001f33f", "urban": "\U0001f3d9\ufe0f", "water": "\U0001f30a", "mixed": "\U0001f310", "other": "\U0001f4cd"}
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
# ---------------------------------------------------------------------------
|
| 201 |
+
# Cached model loading
|
| 202 |
+
# ---------------------------------------------------------------------------
|
| 203 |
+
|
| 204 |
+
@st.cache_resource
|
| 205 |
+
def load_coherence_engine():
|
| 206 |
+
from src.coherence.coherence_engine import CoherenceEngine
|
| 207 |
+
return CoherenceEngine(target_dim=512)
|
| 208 |
+
|
| 209 |
+
@st.cache_resource
|
| 210 |
+
def load_image_retriever():
|
| 211 |
+
from src.generators.image.generator_improved import ImprovedImageRetrievalGenerator
|
| 212 |
+
return ImprovedImageRetrievalGenerator(index_path="data/embeddings/image_index.npz", min_similarity=0.20)
|
| 213 |
+
|
| 214 |
+
@st.cache_resource
|
| 215 |
+
def load_audio_retriever():
|
| 216 |
+
from src.generators.audio.retrieval import AudioRetrievalGenerator
|
| 217 |
+
return AudioRetrievalGenerator(index_path="data/embeddings/audio_index.npz", min_similarity=0.10)
|
| 218 |
+
|
| 219 |
+
@st.cache_resource
|
| 220 |
+
def get_inference_client():
|
| 221 |
+
from huggingface_hub import InferenceClient
|
| 222 |
+
token = os.environ.get("HF_TOKEN")
|
| 223 |
+
return InferenceClient(token=token)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# ---------------------------------------------------------------------------
|
| 227 |
+
# Generation / retrieval functions
|
| 228 |
+
# ---------------------------------------------------------------------------
|
| 229 |
+
|
| 230 |
+
def gen_text_hf(prompt: str) -> dict:
|
| 231 |
+
"""Generate descriptive text using HF Inference API."""
|
| 232 |
+
system_prompt = (
|
| 233 |
+
"You are a concise descriptive writer. "
|
| 234 |
+
"Write a literal description of the scene in 3 to 5 natural sentences. "
|
| 235 |
+
"No bullet points, no numbered lists, no meta commentary. "
|
| 236 |
+
"Focus on concrete visual details AND the likely audio ambience."
|
| 237 |
+
)
|
| 238 |
+
try:
|
| 239 |
+
client = get_inference_client()
|
| 240 |
+
response = client.chat_completion(
|
| 241 |
+
messages=[
|
| 242 |
+
{"role": "system", "content": system_prompt},
|
| 243 |
+
{"role": "user", "content": f"Describe this scene: {prompt}"},
|
| 244 |
+
],
|
| 245 |
+
max_tokens=250,
|
| 246 |
+
)
|
| 247 |
+
text = response.choices[0].message.content.strip()
|
| 248 |
+
if not text:
|
| 249 |
+
raise ValueError("Empty response")
|
| 250 |
+
return {"text": text, "image_prompt": prompt, "audio_prompt": prompt, "plan": None}
|
| 251 |
+
except Exception as e:
|
| 252 |
+
logger.warning("HF Inference API failed: %s — using prompt as text", e)
|
| 253 |
+
return {"text": prompt, "image_prompt": prompt, "audio_prompt": prompt, "plan": None}
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def retrieve_image(prompt: str) -> dict:
|
| 257 |
+
r = load_image_retriever().retrieve(prompt)
|
| 258 |
+
return {
|
| 259 |
+
"path": r.image_path, "similarity": r.similarity, "domain": r.domain,
|
| 260 |
+
"failed": r.retrieval_failed, "top_5": r.top_5, "backend": "retrieval",
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def retrieve_audio(prompt: str) -> dict:
|
| 265 |
+
r = load_audio_retriever().retrieve(prompt)
|
| 266 |
+
return {
|
| 267 |
+
"path": r.audio_path, "similarity": r.similarity,
|
| 268 |
+
"failed": r.retrieval_failed, "top_5": r.top_5, "backend": "retrieval",
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def eval_coherence(text: str, image_path: str, audio_path: str) -> dict:
|
| 273 |
+
return load_coherence_engine().evaluate(text=text, image_path=image_path, audio_path=audio_path)
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# ---------------------------------------------------------------------------
|
| 277 |
+
# HTML helpers
|
| 278 |
+
# ---------------------------------------------------------------------------
|
| 279 |
+
|
| 280 |
+
def _sc_cls(v: Optional[float]) -> str:
|
| 281 |
+
if v is None: return ""
|
| 282 |
+
if v >= 0.45: return "sc-high"
|
| 283 |
+
if v >= 0.30: return "sc-mid"
|
| 284 |
+
return "sc-low"
|
| 285 |
+
|
| 286 |
+
def _sc_badge(v: Optional[float]) -> str:
|
| 287 |
+
if v is None: return ""
|
| 288 |
+
if v >= 0.45: return "High"
|
| 289 |
+
if v >= 0.30: return "Moderate"
|
| 290 |
+
return "Low"
|
| 291 |
+
|
| 292 |
+
def score_card_html(label: str, value: Optional[float], is_class: bool = False) -> str:
|
| 293 |
+
if is_class:
|
| 294 |
+
badge_text = _sc_badge(value) or "N/A"
|
| 295 |
+
val_display = f"{badge_text} Coherence"
|
| 296 |
+
badge_html = f'<div class="sc-badge">MSCI {value:.3f}</div>' if value is not None else ""
|
| 297 |
+
return (f'<div class="sc sc-class"><div class="sc-lbl">{label}</div>'
|
| 298 |
+
f'<div class="sc-val">{val_display}</div>{badge_html}</div>')
|
| 299 |
+
cls = _sc_cls(value)
|
| 300 |
+
val_str = f"{value:.4f}" if value is not None else "\u2014"
|
| 301 |
+
badge = _sc_badge(value)
|
| 302 |
+
badge_html = f'<div class="sc-badge">{badge}</div>' if badge else ""
|
| 303 |
+
return (f'<div class="sc {cls}"><div class="sc-lbl">{label}</div>'
|
| 304 |
+
f'<div class="sc-val">{val_str}</div>{badge_html}</div>')
|
| 305 |
+
|
| 306 |
+
def sim_bar_html(name: str, val: float, mx: float = 0.6) -> str:
|
| 307 |
+
pct = min(val / mx * 100, 100)
|
| 308 |
+
cls = "sbf-g" if val >= 0.35 else ("sbf-y" if val >= 0.20 else "sbf-r")
|
| 309 |
+
return (f'<div class="sb"><div class="sb-top"><span>{name}</span>'
|
| 310 |
+
f'<span class="sb-v">{val:.4f}</span></div>'
|
| 311 |
+
f'<div class="sb-track"><div class="sb-fill {cls}" style="width:{pct}%"></div></div></div>')
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
# ---------------------------------------------------------------------------
|
| 315 |
+
# Main
|
| 316 |
+
# ---------------------------------------------------------------------------
|
| 317 |
+
|
| 318 |
+
def main():
|
| 319 |
+
st.set_page_config(
|
| 320 |
+
page_title="Multimodal Coherence AI",
|
| 321 |
+
page_icon="\U0001f3a8",
|
| 322 |
+
layout="wide",
|
| 323 |
+
initial_sidebar_state="expanded",
|
| 324 |
+
)
|
| 325 |
+
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
|
| 326 |
+
|
| 327 |
+
# Hero
|
| 328 |
+
st.markdown(
|
| 329 |
+
'<div class="hero-wrap">'
|
| 330 |
+
'<div class="hero-title">Multimodal Coherence AI</div>'
|
| 331 |
+
'<div class="hero-sub">Generate semantically coherent <b>text + image + audio</b> bundles '
|
| 332 |
+
'and evaluate cross-modal alignment with the <b>MSCI</b> metric.</div>'
|
| 333 |
+
'</div>', unsafe_allow_html=True)
|
| 334 |
+
|
| 335 |
+
# Sidebar
|
| 336 |
+
with st.sidebar:
|
| 337 |
+
st.markdown("#### Examples")
|
| 338 |
+
for dname, prompts in EXAMPLE_PROMPTS.items():
|
| 339 |
+
icon = DOMAIN_ICONS.get(dname.lower(), "\U0001f4cd")
|
| 340 |
+
with st.expander(f"{icon} {dname}"):
|
| 341 |
+
for p in prompts:
|
| 342 |
+
if st.button(p, key=f"ex_{hash(p)}", use_container_width=True):
|
| 343 |
+
st.session_state["prompt_input"] = p
|
| 344 |
+
|
| 345 |
+
st.divider()
|
| 346 |
+
st.markdown(
|
| 347 |
+
'<div class="sidebar-info">'
|
| 348 |
+
'<b>Text</b> HF Inference API<br>'
|
| 349 |
+
'<b>Image</b> CLIP retrieval (57 images)<br>'
|
| 350 |
+
'<b>Audio</b> CLAP retrieval (104 clips)<br><br>'
|
| 351 |
+
'<b>Metric</b> MSCI = 0.45 × s<sub>t,i</sub> + 0.45 × s<sub>t,a</sub><br><br>'
|
| 352 |
+
'<b>Models</b><br>'
|
| 353 |
+
'CLIP ViT-B/32 (text-image)<br>'
|
| 354 |
+
'CLAP HTSAT-unfused (text-audio)'
|
| 355 |
+
'</div>', unsafe_allow_html=True)
|
| 356 |
+
|
| 357 |
+
# Prompt input
|
| 358 |
+
default_prompt = st.session_state.get("prompt_input", "")
|
| 359 |
+
prompt = st.text_area(
|
| 360 |
+
"Scene", value=default_prompt, height=80,
|
| 361 |
+
placeholder="Describe a scene... e.g., 'A peaceful forest at dawn with birdsong and morning mist'",
|
| 362 |
+
label_visibility="collapsed",
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
# Button + chips
|
| 366 |
+
bc1, bc2 = st.columns([1, 3])
|
| 367 |
+
with bc1:
|
| 368 |
+
go = st.button("Generate Bundle", type="primary", use_container_width=True, disabled=not prompt.strip())
|
| 369 |
+
with bc2:
|
| 370 |
+
st.markdown(
|
| 371 |
+
'<div class="chip-row">'
|
| 372 |
+
'<span class="chip chip-purple"><span class="chip-dot chip-dot-purple"></span>Retrieval</span>'
|
| 373 |
+
'<span class="chip chip-green"><span class="chip-dot chip-dot-green"></span>CLIP + CLAP</span>'
|
| 374 |
+
'</div>', unsafe_allow_html=True)
|
| 375 |
+
|
| 376 |
+
# Welcome state
|
| 377 |
+
if not go and "last_result" not in st.session_state:
|
| 378 |
+
st.markdown(
|
| 379 |
+
'<div class="welcome">'
|
| 380 |
+
'<div class="welcome-icons">\U0001f3a8 \U0001f5bc\ufe0f \U0001f50a</div>'
|
| 381 |
+
'<div class="welcome-text">Enter a scene description and click <b>Generate Bundle</b></div>'
|
| 382 |
+
'<div class="welcome-hint">or pick an example from the sidebar</div>'
|
| 383 |
+
'</div>', unsafe_allow_html=True)
|
| 384 |
+
return
|
| 385 |
+
|
| 386 |
+
if go and prompt.strip():
|
| 387 |
+
st.session_state["last_result"] = run_pipeline(prompt.strip())
|
| 388 |
+
|
| 389 |
+
if "last_result" in st.session_state:
|
| 390 |
+
show_results(st.session_state["last_result"])
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
# ---------------------------------------------------------------------------
|
| 394 |
+
# Pipeline
|
| 395 |
+
# ---------------------------------------------------------------------------
|
| 396 |
+
|
| 397 |
+
def run_pipeline(prompt: str) -> dict:
|
| 398 |
+
R: dict = {}
|
| 399 |
+
t_all = time.time()
|
| 400 |
+
|
| 401 |
+
# 1) Text
|
| 402 |
+
with st.status("Generating text...", expanded=True) as s:
|
| 403 |
+
t0 = time.time()
|
| 404 |
+
try:
|
| 405 |
+
R["text"] = gen_text_hf(prompt)
|
| 406 |
+
R["t_text"] = time.time() - t0
|
| 407 |
+
s.update(label=f"Text ready ({R['t_text']:.1f}s)", state="complete")
|
| 408 |
+
except Exception as e:
|
| 409 |
+
s.update(label=f"Text failed: {e}", state="error")
|
| 410 |
+
R["text"] = {"text": prompt, "image_prompt": prompt, "audio_prompt": prompt}
|
| 411 |
+
R["t_text"] = time.time() - t0
|
| 412 |
+
|
| 413 |
+
ip = R["text"].get("image_prompt", prompt)
|
| 414 |
+
ap = R["text"].get("audio_prompt", prompt)
|
| 415 |
+
|
| 416 |
+
# 2) Image retrieval
|
| 417 |
+
with st.status("Retrieving image...", expanded=True) as s:
|
| 418 |
+
t0 = time.time()
|
| 419 |
+
try:
|
| 420 |
+
R["image"] = retrieve_image(ip)
|
| 421 |
+
R["t_img"] = time.time() - t0
|
| 422 |
+
f = R["image"].get("failed", False)
|
| 423 |
+
lbl = f"Image retrieved (sim={R['image']['similarity']:.3f}, {R['t_img']:.1f}s)"
|
| 424 |
+
if f:
|
| 425 |
+
lbl += " \u2014 below threshold"
|
| 426 |
+
s.update(label=lbl, state="complete" if not f else "error")
|
| 427 |
+
except Exception as e:
|
| 428 |
+
s.update(label=f"Image failed: {e}", state="error")
|
| 429 |
+
R["image"] = None
|
| 430 |
+
R["t_img"] = time.time() - t0
|
| 431 |
+
|
| 432 |
+
# 3) Audio retrieval
|
| 433 |
+
with st.status("Retrieving audio...", expanded=True) as s:
|
| 434 |
+
t0 = time.time()
|
| 435 |
+
try:
|
| 436 |
+
R["audio"] = retrieve_audio(ap)
|
| 437 |
+
R["t_aud"] = time.time() - t0
|
| 438 |
+
f = R["audio"].get("failed", False)
|
| 439 |
+
lbl = f"Audio retrieved (sim={R['audio']['similarity']:.3f}, {R['t_aud']:.1f}s)"
|
| 440 |
+
if f:
|
| 441 |
+
lbl += " \u2014 below threshold"
|
| 442 |
+
s.update(label=lbl, state="complete" if not f else "error")
|
| 443 |
+
except Exception as e:
|
| 444 |
+
s.update(label=f"Audio failed: {e}", state="error")
|
| 445 |
+
R["audio"] = None
|
| 446 |
+
R["t_aud"] = time.time() - t0
|
| 447 |
+
|
| 448 |
+
# 4) Coherence evaluation
|
| 449 |
+
with st.status("Evaluating coherence...", expanded=True) as s:
|
| 450 |
+
t0 = time.time()
|
| 451 |
+
try:
|
| 452 |
+
imgp = R.get("image", {}).get("path") if R.get("image") else None
|
| 453 |
+
audp = R.get("audio", {}).get("path") if R.get("audio") else None
|
| 454 |
+
R["coherence"] = eval_coherence(R["text"]["text"], imgp, audp)
|
| 455 |
+
R["t_eval"] = time.time() - t0
|
| 456 |
+
msci = R["coherence"].get("scores", {}).get("msci")
|
| 457 |
+
s.update(label=f"MSCI = {msci:.4f} ({R['t_eval']:.1f}s)", state="complete")
|
| 458 |
+
except Exception as e:
|
| 459 |
+
s.update(label=f"Eval failed: {e}", state="error")
|
| 460 |
+
R["coherence"] = None
|
| 461 |
+
R["t_eval"] = time.time() - t0
|
| 462 |
+
|
| 463 |
+
R["t_total"] = time.time() - t_all
|
| 464 |
+
R["prompt"] = prompt
|
| 465 |
+
return R
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
# ---------------------------------------------------------------------------
|
| 469 |
+
# Results display
|
| 470 |
+
# ---------------------------------------------------------------------------
|
| 471 |
+
|
| 472 |
+
def show_results(R: dict):
|
| 473 |
+
coh = R.get("coherence")
|
| 474 |
+
sc = coh.get("scores", {}) if coh else {}
|
| 475 |
+
msci = sc.get("msci")
|
| 476 |
+
st_i = sc.get("st_i")
|
| 477 |
+
st_a = sc.get("st_a")
|
| 478 |
+
|
| 479 |
+
# Score cards
|
| 480 |
+
st.markdown('<div class="sec-label">Coherence Scores</div>', unsafe_allow_html=True)
|
| 481 |
+
cards = (
|
| 482 |
+
score_card_html("MSCI (Overall)", msci)
|
| 483 |
+
+ score_card_html("Text \u2192 Image", st_i)
|
| 484 |
+
+ score_card_html("Text \u2192 Audio", st_a)
|
| 485 |
+
+ score_card_html("Classification", msci, is_class=True)
|
| 486 |
+
)
|
| 487 |
+
st.markdown(f'<div class="scores-grid">{cards}</div>', unsafe_allow_html=True)
|
| 488 |
+
|
| 489 |
+
# Timing strip
|
| 490 |
+
tt = R.get("t_total", 0)
|
| 491 |
+
sep = '<span class="t-sep">|</span>'
|
| 492 |
+
st.markdown(
|
| 493 |
+
f'<div class="timing">'
|
| 494 |
+
f'<span class="t-total">Total {tt:.1f}s</span>{sep}'
|
| 495 |
+
f'<span>Text {R.get("t_text", 0):.1f}s</span>{sep}'
|
| 496 |
+
f'<span>Image {R.get("t_img", 0):.1f}s</span>{sep}'
|
| 497 |
+
f'<span>Audio {R.get("t_aud", 0):.1f}s</span>{sep}'
|
| 498 |
+
f'<span>Eval {R.get("t_eval", 0):.1f}s</span>'
|
| 499 |
+
f'</div>', unsafe_allow_html=True)
|
| 500 |
+
|
| 501 |
+
st.markdown("---")
|
| 502 |
+
|
| 503 |
+
# Three columns: text | image | audio
|
| 504 |
+
ct, ci, ca = st.columns([1.15, 1, 0.85])
|
| 505 |
+
|
| 506 |
+
with ct:
|
| 507 |
+
st.markdown('<div class="sec-label">Generated Text</div>', unsafe_allow_html=True)
|
| 508 |
+
txt = R.get("text", {}).get("text", "")
|
| 509 |
+
st.markdown(f'<div class="text-card">{txt}</div>', unsafe_allow_html=True)
|
| 510 |
+
|
| 511 |
+
with ci:
|
| 512 |
+
st.markdown('<div class="sec-label">Image</div>', unsafe_allow_html=True)
|
| 513 |
+
ii = R.get("image")
|
| 514 |
+
if ii and ii.get("path"):
|
| 515 |
+
ip = Path(ii["path"])
|
| 516 |
+
failed = ii.get("failed", False)
|
| 517 |
+
sim = ii.get("similarity")
|
| 518 |
+
|
| 519 |
+
if failed:
|
| 520 |
+
st.markdown(
|
| 521 |
+
f'<div class="warn-banner"><b>Below threshold</b> '
|
| 522 |
+
f'(sim={sim:.3f} < {IMAGE_SIM_THRESHOLD}) '
|
| 523 |
+
f'\u2014 best match from index.</div>',
|
| 524 |
+
unsafe_allow_html=True)
|
| 525 |
+
|
| 526 |
+
if ip.exists():
|
| 527 |
+
st.image(str(ip), use_container_width=True)
|
| 528 |
+
dom = ii.get("domain", "other")
|
| 529 |
+
ic = DOMAIN_ICONS.get(dom, "\U0001f4cd")
|
| 530 |
+
st.caption(f"{ic} {dom} \u00b7 sim **{sim:.3f}** \u00b7 {ip.name}")
|
| 531 |
+
else:
|
| 532 |
+
st.info("No image.")
|
| 533 |
+
|
| 534 |
+
with ca:
|
| 535 |
+
st.markdown('<div class="sec-label">Audio</div>', unsafe_allow_html=True)
|
| 536 |
+
ai = R.get("audio")
|
| 537 |
+
if ai and ai.get("path"):
|
| 538 |
+
ap = Path(ai["path"])
|
| 539 |
+
sim = ai.get("similarity")
|
| 540 |
+
failed = ai.get("failed", False)
|
| 541 |
+
|
| 542 |
+
if failed:
|
| 543 |
+
st.markdown(
|
| 544 |
+
f'<div class="warn-banner"><b>Below threshold</b> '
|
| 545 |
+
f'(sim={sim:.3f} < {AUDIO_SIM_THRESHOLD}).</div>',
|
| 546 |
+
unsafe_allow_html=True)
|
| 547 |
+
|
| 548 |
+
if ap.exists():
|
| 549 |
+
st.audio(str(ap))
|
| 550 |
+
st.caption(f"sim **{sim:.3f}** \u00b7 {ap.name}")
|
| 551 |
+
else:
|
| 552 |
+
st.info("No audio.")
|
| 553 |
+
|
| 554 |
+
st.markdown("---")
|
| 555 |
+
|
| 556 |
+
# Expandable details
|
| 557 |
+
with st.expander("Retrieval Details"):
|
| 558 |
+
r1, r2 = st.columns(2)
|
| 559 |
+
with r1:
|
| 560 |
+
ii = R.get("image")
|
| 561 |
+
if ii and ii.get("top_5"):
|
| 562 |
+
st.markdown("**Image \u2014 Top 5 candidates**")
|
| 563 |
+
bars = "".join(sim_bar_html(n, s) for n, s in ii["top_5"])
|
| 564 |
+
st.markdown(bars, unsafe_allow_html=True)
|
| 565 |
+
else:
|
| 566 |
+
st.write("No image data.")
|
| 567 |
+
with r2:
|
| 568 |
+
ai = R.get("audio")
|
| 569 |
+
if ai and ai.get("top_5"):
|
| 570 |
+
st.markdown("**Audio \u2014 Top 5 candidates**")
|
| 571 |
+
bars = "".join(sim_bar_html(n, s) for n, s in ai["top_5"])
|
| 572 |
+
st.markdown(bars, unsafe_allow_html=True)
|
| 573 |
+
else:
|
| 574 |
+
st.write("No audio data.")
|
| 575 |
+
|
| 576 |
+
with st.expander("Full Coherence Report"):
|
| 577 |
+
if coh:
|
| 578 |
+
st.json(coh)
|
| 579 |
+
else:
|
| 580 |
+
st.write("No data.")
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
if __name__ == "__main__":
|
| 584 |
+
main()
|
data/embeddings/audio_index.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d3f42fa5191e4f226284ea44ded2088e2d536e866c5ceca72f3a97b92084b02
|
| 3 |
+
size 201463
|
data/embeddings/image_index.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d1f400311b4c7a1f23767d1e4e589bc1734db734e261c3f88235bfcdede72c1
|
| 3 |
+
size 111366
|
data/freesound/audio/fs_001_nature_Autumn_leaves_falling_on_forest_floor_(close,_loopable).wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a19139048193129622a4a260ac867ce54bf6faa3cbea57b2156864a5b05e0e3
|
| 3 |
+
size 904034
|
data/freesound/audio/fs_002_nature_woodpeckers_Haanwijk_1206_PM_250306_1056.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e94c8382bde74a377c8ee9eda21c6e30efd0ea88085dd6e291b188c252b3749a
|
| 3 |
+
size 1965922
|
data/freesound/audio/fs_003_nature_Bird_-_Tawny_owl,_female.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a74ec22281db411cbaca31c06e218e4521121ab2235d2ed8ed0898e2aba9d6a
|
| 3 |
+
size 1239808
|
data/freesound/audio/fs_004_nature_Bird_Chirps.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67e4db8b8f5019aa421247c28dca4d07fd6ca49825486dc0e43f9199db5532fc
|
| 3 |
+
size 1992088
|
data/freesound/audio/fs_005_nature_Birds_Chirping_Busy.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cb9914242fbb4cfc6b1f15f5684b4d6c2a065dd07c8988db28dc57dab00e87e
|
| 3 |
+
size 2196300
|
data/freesound/audio/fs_006_nature_bird_in_oostduinen_17.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab66fccbe0a5c5bbb8909fcc89875598b8e1267cb4886e8f1ebcdbe12145e6c6
|
| 3 |
+
size 2496044
|
data/freesound/audio/fs_007_nature_wind_bushes_coast_903_PM_240311_0680.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b17b1c51903e9e8f6c302eb6a3e5b661314e5bce366f16f977a33ec0cbfc3d0
|
| 3 |
+
size 2497080
|
data/freesound/audio/fs_008_nature_estate_farm_noise_1153AM3_220509_0343.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c27d632f13199874d3a7bcd239faf615cd74bfccd307cc30f00230ba9a92880e
|
| 3 |
+
size 1290268
|
data/freesound/audio/fs_009_nature_Howler_monkey_howling_in_the_evening_jungle.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f7d541ad4f512b08a7890e8b6c54802a79058360476e38a5b6569f35465edfd
|
| 3 |
+
size 2823188
|
data/freesound/audio/fs_010_nature_forest_heavy_rain_loop.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10d2681d774b21b0b6b5d74ffa46546afd6d927e3670e4f82bb9592ec28fa8ed
|
| 3 |
+
size 2016046
|
data/freesound/audio/fs_011_nature_CardinalChirpingParabolicMP3VersionMay62013.mp3.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21bac3a5ac767cd7f3b03108756dbf8a8c63605b86dda71db30a209a479fdc42
|
| 3 |
+
size 2066826
|
data/freesound/audio/fs_012_nature_Superb_Lyrebird_II.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa8f7970bda68d45a2dc8307f2f2c0feb789573bcdbcb1699db59eb18d736fdd
|
| 3 |
+
size 2731094
|
data/freesound/audio/fs_013_nature_Night_Wind_Chimes.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcf6ed33ddd7283df586f8c39b03bd5a4e27fc2323be6bf72e796c80b4a59be7
|
| 3 |
+
size 1556490
|
data/freesound/audio/fs_014_nature_Southern_Summer_Evening_Ambience_with_Crickets_4.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98035c3df9658cc73fac5214e29ff73dfe4a56ff93f89dd8d7b6fa41e74799b4
|
| 3 |
+
size 2810960
|
data/freesound/audio/fs_015_nature_Li_River._Crickets_&_Frogs.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:225df4253c469d91504e23fa4e365d09893d5e9fcb5cb9dec7a03ecc7ee1f7be
|
| 3 |
+
size 2758824
|
data/freesound/audio/fs_016_nature_Thunder_sound_effect_2.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7eae669115404f55dac50814b543aa96665a91ffee618eabe999698170f93fa6
|
| 3 |
+
size 1750612
|
data/freesound/audio/fs_017_nature_Rain_medium_-_falling_on_soft_surface.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cb55480eaefd15af572af917f0689d9b236b9317c4c5fb52d84ab800722a3dc
|
| 3 |
+
size 2595922
|
data/freesound/audio/fs_018_nature_Thunder_6.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20f51911074f08f93697d5bbcd7dbb38f6e2930316b074d9c3835c4f49046a8b
|
| 3 |
+
size 1317046
|
data/freesound/audio/fs_019_nature_Water_Flowing_into_Underground_Sinkhole.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04ca65aa8f703a36700c89ae090dd778c8a66a8a33fe66cd3e0abcbf150f046d
|
| 3 |
+
size 2192044
|
data/freesound/audio/fs_020_nature_Water_Flowing_Through_Close_Rapids_6.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f46d5db77e80d22b8d547e71367d95ee36b2ffa526321d6eb49507ad60f8952
|
| 3 |
+
size 2214176
|
data/freesound/audio/fs_021_nature_Water_Flowing_Through_Close_Rapids_5.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d010c690c23be21d4d46b130f4442b8b5f1da88c9f202e569dea739b7cd499f1
|
| 3 |
+
size 2616404
|
data/freesound/audio/fs_022_urban_CITY_AMBIENCE_for_CLEAN_LOOP.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f42d5388f879b79e682ac9807c935312353d014b205437bddee53c0dba559b1b
|
| 3 |
+
size 2146684
|
data/freesound/audio/fs_023_urban_Kitchen_Ambience_During_Daytime_.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5758edd039b63d52cb9e1b25f03c5eefd9de9ec777aee18e785d459d70cd7d7
|
| 3 |
+
size 2774062
|
data/freesound/audio/fs_024_urban_police_in_Paris.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13b6de6a41e3b27bb04a38122750cb73827795b2e9b4dbc064f7059c60ba9ad3
|
| 3 |
+
size 2859688
|
data/freesound/audio/fs_025_urban_BerlinGameScene.com_Crowd_Cheer_4.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca553ee2ac8517e773e9b3c6c544763a934e8059388c4a1ff64469f6c6ac282f
|
| 3 |
+
size 682308
|
data/freesound/audio/fs_026_urban_Да_ладно.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e915163fa6094eb25107cc4255e716c39b954f7816b8af31d4379f1a84433570
|
| 3 |
+
size 1029580
|
data/freesound/audio/fs_027_urban_Restaurant_5.mp3.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46956eee584e391c06b083a55a7e301e7d90b7c21810b35f5c33b6dcfd0c6ecf
|
| 3 |
+
size 2727330
|
data/freesound/audio/fs_028_urban_Industrial_grinder.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32aa2939ede7f9b5b573976e2bddd69cafb989b84912f41c7c92e68312b81078
|
| 3 |
+
size 2343072
|
data/freesound/audio/fs_029_urban_Continuous_mechanical_whir_.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34cb93921b80702b4a895f978640864d16b80d105c68cdd92e5832020368fde
|
| 3 |
+
size 2569814
|
data/freesound/audio/fs_030_urban_Electric_saw.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9eebafed83e3904b2607bbe55013740f966f806ab607346a4a783f4bb4fee159
|
| 3 |
+
size 2581360
|
data/freesound/audio/fs_031_urban_Renault_Master_F3500_dCi135_Foley_Horn_Outside_Mono.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:627b86f6d4d094686ef69849ba01156e0dd59ce3524134d518eaab6010d35650
|
| 3 |
+
size 742252
|
data/freesound/audio/fs_032_urban_Street_atmosphere_and_sounds_in_Rome.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50615b73fe56ddf1a2a182b3800dcbd6e43c731c51d47d62794e6e6043eaa775
|
| 3 |
+
size 2296034
|
data/freesound/audio/fs_033_urban_Distant_horn_beeps_09.flac.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d6be04f78e509ecb0311405f0e47d22ca1c6de01a9c9af7b1001796851d7069
|
| 3 |
+
size 1588044
|
data/freesound/audio/fs_034_urban_Underground_Train_Station_Ambience.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dad7da1daf478f4b52fb81f69642fec07e92e015073966fe8aceeb1a8124cc51
|
| 3 |
+
size 2078834
|
data/freesound/audio/fs_035_urban_subwaystartsequence.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20b0d9dcaaa7d811f77aebf37b15cbbb932c2b91cfa7d05e7ff5d7fba293c746
|
| 3 |
+
size 1399148
|
data/freesound/audio/fs_036_urban_Train,_Subway,_Pass_By,_Under_Bridge_Perspective.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb5a75bb293b4e43675f3ac73ba6d43dabe507f531452776b5e34aeda5f90c8f
|
| 3 |
+
size 1990804
|
data/freesound/audio/fs_037_urban_SFX_Donkey_Cart.MP3.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76547ae043e0ac913767dd5c4726cfc68067048f7c8ac2059aad917257c61887
|
| 3 |
+
size 854038
|
data/freesound/audio/fs_038_urban_people_talking_at_street_market.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6b4025514d31795ceca082446fd2dc85159e8b3dbc1c2606dcc73163b2f8351
|
| 3 |
+
size 1176878
|
data/freesound/audio/fs_039_urban_Japan_Tokyo_Shinjuku_Street_Promoter_Yelling_City.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38c4b41f9b1b0a894f55fce69058fdfa830d3e1c90c735fdeb47d409a0bbe257
|
| 3 |
+
size 2448622
|
data/freesound/audio/fs_040_water_waves_close_coast_small_004.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dd695890eb84143ffa372aae09270c6bd4c69c80572ecdf31b69e438ae0d192
|
| 3 |
+
size 1213436
|
data/freesound/audio/fs_041_water_wave_sand_beach_012.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c585c1a0d81502962a3e964b6bd472534d520a952a4ca9d33798bc2c88683b2a
|
| 3 |
+
size 572044
|
data/freesound/audio/fs_042_water_waves_over_mall_shells_01_150515_00.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41327238d54282c6fe7c7e6187d688f3019737985d4931dab8ec964db51ae37d
|
| 3 |
+
size 1374572
|
data/freesound/audio/fs_043_water_Rain_on_a_Plastic_Roof.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ec74947189f6e2e2b66c3f6f99556dd05264291c8a1cc67eb2fe9a66e130027
|
| 3 |
+
size 2136578
|
data/freesound/audio/fs_044_water_Ambiance_Waterfall_Big_Skogafoss_Far_Loop_Stereo_02.wav.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:048b7779e05420063cb4c2aefc0c4a480e0fec9cca3227d79a95f2b35692f9ac
|
| 3 |
+
size 2880044
|
data/freesound/audio/fs_045_water_Water_Stream_1.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe4d4a8c6655ef169541954da8db29a889da1c12135e28638389f8c4a59fe879
|
| 3 |
+
size 2496044
|