thanks to ai-forever ❤
Browse files- .gitattributes +16 -0
- README.md +133 -0
- __assets__/pipeline.jpg +0 -0
- __assets__/results/A car moving on the road from the sea to the mountains.gif +3 -0
- __assets__/results/A red car drifting, 4k video.gif +3 -0
- __assets__/results/Craft a heartwarming narrative showcasing the bond between a human and their loyal pet companion..gif +3 -0
- __assets__/results/Erupting volcano_ raw power, molten lava, and the forces of the Earth.gif +3 -0
- __assets__/results/Evoke the sense of wonder in a time-lapse journey through changing seasons..gif +3 -0
- __assets__/results/Explore the fascinating world of underwater creatures in a visually stunning sequence.gif +3 -0
- __assets__/results/Majestic humpback whale breaching_ power, grace, and ocean spectacle.gif +3 -0
- __assets__/results/Majestic waterfalls in a lush rainforest_ power, mist, and biodiversity.gif +3 -0
- __assets__/results/Polar ice caps_ the pristine wilderness of the Arctic and Antarctic.gif +3 -0
- __assets__/results/Rolling waves on a sandy beach_ relaxation, rhythm, and coastal beauty.gif +3 -0
- __assets__/results/Sloth in slow motion_ deliberate movements, relaxation, and arboreal life.gif +3 -0
- __assets__/results/Time-lapse of a flower blooming_ growth, beauty, and the passage of time..gif +3 -0
- __assets__/results/Wildlife migration_ herds on the move, crossing landscapes in harmony.gif +3 -0
- __assets__/results/chemistry laboratory, chemical explosion, 4k.gif +3 -0
- __assets__/results/luminescent jellyfish swims underwater, neon, 4k.gif +3 -0
- __assets__/results/white ghost flies through a night clearing, 4k.gif +3 -0
- __assets__/title.JPG +0 -0
- weights/kandinsky_video.pt +3 -0
- weights/kandinsky_video_interpolation.pt +3 -0
- weights/movq.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
__assets__/results/A[[:space:]]car[[:space:]]moving[[:space:]]on[[:space:]]the[[:space:]]road[[:space:]]from[[:space:]]the[[:space:]]sea[[:space:]]to[[:space:]]the[[:space:]]mountains.gif filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
__assets__/results/A[[:space:]]red[[:space:]]car[[:space:]]drifting,[[:space:]]4k[[:space:]]video.gif filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
__assets__/results/Craft[[:space:]]a[[:space:]]heartwarming[[:space:]]narrative[[:space:]]showcasing[[:space:]]the[[:space:]]bond[[:space:]]between[[:space:]]a[[:space:]]human[[:space:]]and[[:space:]]their[[:space:]]loyal[[:space:]]pet[[:space:]]companion..gif filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
__assets__/results/Erupting[[:space:]]volcano_[[:space:]]raw[[:space:]]power,[[:space:]]molten[[:space:]]lava,[[:space:]]and[[:space:]]the[[:space:]]forces[[:space:]]of[[:space:]]the[[:space:]]Earth.gif filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
__assets__/results/Evoke[[:space:]]the[[:space:]]sense[[:space:]]of[[:space:]]wonder[[:space:]]in[[:space:]]a[[:space:]]time-lapse[[:space:]]journey[[:space:]]through[[:space:]]changing[[:space:]]seasons..gif filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
__assets__/results/Explore[[:space:]]the[[:space:]]fascinating[[:space:]]world[[:space:]]of[[:space:]]underwater[[:space:]]creatures[[:space:]]in[[:space:]]a[[:space:]]visually[[:space:]]stunning[[:space:]]sequence.gif filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
__assets__/results/Majestic[[:space:]]humpback[[:space:]]whale[[:space:]]breaching_[[:space:]]power,[[:space:]]grace,[[:space:]]and[[:space:]]ocean[[:space:]]spectacle.gif filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
__assets__/results/Majestic[[:space:]]waterfalls[[:space:]]in[[:space:]]a[[:space:]]lush[[:space:]]rainforest_[[:space:]]power,[[:space:]]mist,[[:space:]]and[[:space:]]biodiversity.gif filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
__assets__/results/Polar[[:space:]]ice[[:space:]]caps_[[:space:]]the[[:space:]]pristine[[:space:]]wilderness[[:space:]]of[[:space:]]the[[:space:]]Arctic[[:space:]]and[[:space:]]Antarctic.gif filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
__assets__/results/Rolling[[:space:]]waves[[:space:]]on[[:space:]]a[[:space:]]sandy[[:space:]]beach_[[:space:]]relaxation,[[:space:]]rhythm,[[:space:]]and[[:space:]]coastal[[:space:]]beauty.gif filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
__assets__/results/Sloth[[:space:]]in[[:space:]]slow[[:space:]]motion_[[:space:]]deliberate[[:space:]]movements,[[:space:]]relaxation,[[:space:]]and[[:space:]]arboreal[[:space:]]life.gif filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
__assets__/results/Time-lapse[[:space:]]of[[:space:]]a[[:space:]]flower[[:space:]]blooming_[[:space:]]growth,[[:space:]]beauty,[[:space:]]and[[:space:]]the[[:space:]]passage[[:space:]]of[[:space:]]time..gif filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
__assets__/results/Wildlife[[:space:]]migration_[[:space:]]herds[[:space:]]on[[:space:]]the[[:space:]]move,[[:space:]]crossing[[:space:]]landscapes[[:space:]]in[[:space:]]harmony.gif filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
__assets__/results/chemistry[[:space:]]laboratory,[[:space:]]chemical[[:space:]]explosion,[[:space:]]4k.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
__assets__/results/luminescent[[:space:]]jellyfish[[:space:]]swims[[:space:]]underwater,[[:space:]]neon,[[:space:]]4k.gif filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
__assets__/results/white[[:space:]]ghost[[:space:]]flies[[:space:]]through[[:space:]]a[[:space:]]night[[:space:]]clearing,[[:space:]]4k.gif filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
---
|
| 4 |
+
# Kandinsky Video — a new text-to-video generation model
|
| 5 |
+
## SoTA quality among open-source solutions
|
| 6 |
+
|
| 7 |
+
This repository is the official implementation of Kandinsky Video model
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
Paper | [Project](https://ai-forever.github.io/kandinsky-video/) |  | [Telegram-bot](https://t.me/video_kandinsky_bot) | Habr post
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
<p align="center">
|
| 14 |
+
<img src="__assets__/title.JPG" width="800px"/>
|
| 15 |
+
<br>
|
| 16 |
+
<em>Kandinsky Video is a text-to-video generation model, which is based on the FusionFrames architecture, consisting of two main stages: keyframe generation and interpolation. Our approach for temporal conditioning allows us to generate videos with high-quality appearance, smoothness and dynamics.</em>
|
| 17 |
+
</p>
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
## Pipeline
|
| 22 |
+
|
| 23 |
+
<p align="center">
|
| 24 |
+
<img src="__assets__/pipeline.jpg" width="800px"/>
|
| 25 |
+
<br>
|
| 26 |
+
<em>The encoded text prompt enters the U-Net keyframe generation model with temporal layers or blocks, and then the sampled latent keyframes are sent to the latent interpolation model in such a way as to predict three interpolation frames between two keyframes. A temporal MoVQ-GAN decoder is used to get the final video result.</em>
|
| 27 |
+
</p>
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
**Architecture details**
|
| 31 |
+
|
| 32 |
+
+ Text encoder (Flan-UL2) - 8.6B
|
| 33 |
+
+ Latent Diffusion U-Net3D - 4.0B
|
| 34 |
+
+ MoVQ encoder/decoder - 256M
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
## How to use
|
| 38 |
+
|
| 39 |
+
Check our jupyter notebooks with examples in `./examples` folder
|
| 40 |
+
### 1. text2video
|
| 41 |
+
|
| 42 |
+
```python
|
| 43 |
+
from video_kandinsky3 import get_T2V_pipeline
|
| 44 |
+
|
| 45 |
+
t2v_pipe = get_T2V_pipeline('cuda', fp16=True)
|
| 46 |
+
|
| 47 |
+
pfps = 'medium' # ['low', 'medium', 'high']
|
| 48 |
+
video = t2v_pipe(
|
| 49 |
+
'a red car is drifting on the mountain road, close view, fast movement',
|
| 50 |
+
width=640, height=384, fps=fps
|
| 51 |
+
)
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
## Results
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
<table class="center">
|
| 59 |
+
<tr>
|
| 60 |
+
<td><img src="__assets__/results/A car moving on the road from the sea to the mountains.gif" raw=true></td>
|
| 61 |
+
<td><img src="__assets__/results/A red car drifting, 4k video.gif"></td>
|
| 62 |
+
<td><img src="__assets__/results/chemistry laboratory, chemical explosion, 4k.gif"></td>
|
| 63 |
+
<td><img src="__assets__/results/Erupting volcano_ raw power, molten lava, and the forces of the Earth.gif"></td>
|
| 64 |
+
</tr>
|
| 65 |
+
<tr>
|
| 66 |
+
<td width=25% align="center">"A car moving on the road from the sea to the mountains"</td>
|
| 67 |
+
<td width=25% align="center">"A red car drifting, 4k video"</td>
|
| 68 |
+
<td width=25% align="center">"Chemistry laboratory, chemical explosion, 4k"</td>
|
| 69 |
+
<td width=25% align="center">"Erupting volcano raw power, molten lava, and the forces of the Earth"</td>
|
| 70 |
+
</tr>
|
| 71 |
+
|
| 72 |
+
<tr>
|
| 73 |
+
<td><img src="__assets__/results/luminescent jellyfish swims underwater, neon, 4k.gif" raw=true></td>
|
| 74 |
+
<td><img src="__assets__/results/Majestic waterfalls in a lush rainforest_ power, mist, and biodiversity.gif"></td>
|
| 75 |
+
<td><img src="__assets__/results/white ghost flies through a night clearing, 4k.gif"></td>
|
| 76 |
+
<td><img src="__assets__/results/Wildlife migration_ herds on the move, crossing landscapes in harmony.gif"></td>
|
| 77 |
+
</tr>
|
| 78 |
+
<tr>
|
| 79 |
+
<td width=25% align="center">"Luminescent jellyfish swims underwater, neon, 4k"</td>
|
| 80 |
+
<td width=25% align="center">"Majestic waterfalls in a lush rainforest power, mist, and biodiversity"</td>
|
| 81 |
+
<td width=25% align="center">"White ghost flies through a night clearing, 4k"</td>
|
| 82 |
+
<td width=25% align="center">"Wildlife migration herds on the move, crossing landscapes in harmony"</td>
|
| 83 |
+
</tr>
|
| 84 |
+
|
| 85 |
+
<tr>
|
| 86 |
+
<td><img src="__assets__/results/Majestic humpback whale breaching_ power, grace, and ocean spectacle.gif" raw=true></td>
|
| 87 |
+
<td><img src="__assets__/results/Evoke the sense of wonder in a time-lapse journey through changing seasons..gif"></td>
|
| 88 |
+
<td><img src="__assets__/results/Explore the fascinating world of underwater creatures in a visually stunning sequence.gif"></td>
|
| 89 |
+
<td><img src="__assets__/results/Polar ice caps_ the pristine wilderness of the Arctic and Antarctic.gif"></td>
|
| 90 |
+
</tr>
|
| 91 |
+
<tr>
|
| 92 |
+
<td width=25% align="center">"Majestic humpback whale breaching power, grace, and ocean spectacle"</td>
|
| 93 |
+
<td width=25% align="center">"Evoke the sense of wonder in a time-lapse journey through changing seasons"</td>
|
| 94 |
+
<td width=25% align="center">"Explore the fascinating world of underwater creatures in a visually stunning sequence"</td>
|
| 95 |
+
<td width=25% align="center">"Polar ice caps the pristine wilderness of the Arctic and Antarctic"</td>
|
| 96 |
+
</tr>
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
<tr>
|
| 100 |
+
<td><img src="__assets__/results/Rolling waves on a sandy beach_ relaxation, rhythm, and coastal beauty.gif" raw=true></td>
|
| 101 |
+
<td><img src="__assets__/results/Sloth in slow motion_ deliberate movements, relaxation, and arboreal life.gif"></td>
|
| 102 |
+
<td><img src="__assets__/results/Time-lapse of a flower blooming_ growth, beauty, and the passage of time..gif"></td>
|
| 103 |
+
<td><img src="__assets__/results/Craft a heartwarming narrative showcasing the bond between a human and their loyal pet companion..gif"></td>
|
| 104 |
+
</tr>
|
| 105 |
+
<tr>
|
| 106 |
+
<td width=25% align="center">"Rolling waves on a sandy beach relaxation, rhythm, and coastal beauty"</td>
|
| 107 |
+
<td width=25% align="center">"Sloth in slow motion deliberate movements, relaxation, and arboreal life"</td>
|
| 108 |
+
<td width=25% align="center">"Time-lapse of a flower blooming growth, beauty, and the passage of time"</td>
|
| 109 |
+
<td width=25% align="center">"Craft a heartwarming narrative showcasing the bond between a human and their loyal pet companion"</td>
|
| 110 |
+
</tr>
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
</table>
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# Authors
|
| 117 |
+
|
| 118 |
+
+ Vladimir Arkhipkin: [Github](https://github.com/oriBetelgeuse), [Google Scholar](https://scholar.google.com/citations?user=D-Ko0oAAAAAJ&hl=ru)
|
| 119 |
+
+ Zein Shaheen: [Github](https://github.com/zeinsh), [Google Scholar](https://scholar.google.ru/citations?user=bxlgMxMAAAAJ&hl=en)
|
| 120 |
+
+ Viacheslav Vasilev: [Github](https://github.com/vivasilev), [Google Scholar](https://scholar.google.com/citations?user=redAz-kAAAAJ&hl=ru&oi=sra)
|
| 121 |
+
+ Igor Pavlov: [Github](https://github.com/boomb0om)
|
| 122 |
+
+ Elizaveta Dakhova: [Github](https://github.com/LizaDakhova)
|
| 123 |
+
+ Anastasia Lysenko: [Github](https://github.com/LysenkoAnastasia)
|
| 124 |
+
+ Sergey Markov
|
| 125 |
+
+ Denis Dimitrov: [Github](https://github.com/denndimitrov), [Google Scholar](https://scholar.google.com/citations?user=3JSIJpYAAAAJ&hl=ru&oi=ao)
|
| 126 |
+
+ Andrey Kuznetsov: [Github](https://github.com/kuznetsoffandrey), [Google Scholar](https://scholar.google.com/citations?user=q0lIfCEAAAAJ&hl=ru)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
## BibTeX
|
| 130 |
+
If you use our work in your research, please cite our publication:
|
| 131 |
+
```
|
| 132 |
+
TBD
|
| 133 |
+
```
|
__assets__/pipeline.jpg
ADDED
|
__assets__/results/A car moving on the road from the sea to the mountains.gif
ADDED
|
Git LFS Details
|
__assets__/results/A red car drifting, 4k video.gif
ADDED
|
Git LFS Details
|
__assets__/results/Craft a heartwarming narrative showcasing the bond between a human and their loyal pet companion..gif
ADDED
|
Git LFS Details
|
__assets__/results/Erupting volcano_ raw power, molten lava, and the forces of the Earth.gif
ADDED
|
Git LFS Details
|
__assets__/results/Evoke the sense of wonder in a time-lapse journey through changing seasons..gif
ADDED
|
Git LFS Details
|
__assets__/results/Explore the fascinating world of underwater creatures in a visually stunning sequence.gif
ADDED
|
Git LFS Details
|
__assets__/results/Majestic humpback whale breaching_ power, grace, and ocean spectacle.gif
ADDED
|
Git LFS Details
|
__assets__/results/Majestic waterfalls in a lush rainforest_ power, mist, and biodiversity.gif
ADDED
|
Git LFS Details
|
__assets__/results/Polar ice caps_ the pristine wilderness of the Arctic and Antarctic.gif
ADDED
|
Git LFS Details
|
__assets__/results/Rolling waves on a sandy beach_ relaxation, rhythm, and coastal beauty.gif
ADDED
|
Git LFS Details
|
__assets__/results/Sloth in slow motion_ deliberate movements, relaxation, and arboreal life.gif
ADDED
|
Git LFS Details
|
__assets__/results/Time-lapse of a flower blooming_ growth, beauty, and the passage of time..gif
ADDED
|
Git LFS Details
|
__assets__/results/Wildlife migration_ herds on the move, crossing landscapes in harmony.gif
ADDED
|
Git LFS Details
|
__assets__/results/chemistry laboratory, chemical explosion, 4k.gif
ADDED
|
Git LFS Details
|
__assets__/results/luminescent jellyfish swims underwater, neon, 4k.gif
ADDED
|
Git LFS Details
|
__assets__/results/white ghost flies through a night clearing, 4k.gif
ADDED
|
Git LFS Details
|
__assets__/title.JPG
ADDED
|
|
weights/kandinsky_video.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75c8f416c05c67f231cd300961416a73e90f255288bff5dce6b80b0bf43da3a6
|
| 3 |
+
size 16642806469
|
weights/kandinsky_video_interpolation.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc724de4187e29aeaccff8f5274cfbe325f23ecaf9fc7adbbdcc8d66ff8b2c83
|
| 3 |
+
size 15490902029
|
weights/movq.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ad3a981f7cd5cd19ff85382319039b94be4c2c11760bf46079d96bb84ca98ef
|
| 3 |
+
size 1082549221
|