bailjumpa ofirbibi commited on
Commit
cce0eda
·
0 Parent(s):

Duplicate from Lightricks/LTX-2

Browse files

Co-authored-by: Ofir Bibi <ofirbibi@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +37 -0
  2. LICENSE +382 -0
  3. README.md +131 -0
  4. audio_vae/config.json +25 -0
  5. audio_vae/diffusion_pytorch_model.safetensors +3 -0
  6. connectors/config.json +19 -0
  7. connectors/diffusion_pytorch_model.safetensors +3 -0
  8. latent_upsampler/config.json +11 -0
  9. latent_upsampler/diffusion_pytorch_model.safetensors +3 -0
  10. ltx-2-19b-dev-fp4.safetensors +3 -0
  11. ltx-2-19b-dev-fp8.safetensors +3 -0
  12. ltx-2-19b-dev.safetensors +3 -0
  13. ltx-2-19b-distilled-fp8.safetensors +3 -0
  14. ltx-2-19b-distilled-lora-384.safetensors +3 -0
  15. ltx-2-19b-distilled.safetensors +3 -0
  16. ltx-2-running-local.mp4 +3 -0
  17. ltx-2-spatial-upscaler-x2-1.0.safetensors +3 -0
  18. ltx-2-temporal-upscaler-x2-1.0.safetensors +3 -0
  19. model_index.json +36 -0
  20. scheduler/scheduler_config.json +18 -0
  21. text_encoder/config.json +114 -0
  22. text_encoder/diffusion_pytorch_model-00001-of-00012.safetensors +3 -0
  23. text_encoder/diffusion_pytorch_model-00002-of-00012.safetensors +3 -0
  24. text_encoder/diffusion_pytorch_model-00003-of-00012.safetensors +3 -0
  25. text_encoder/diffusion_pytorch_model-00004-of-00012.safetensors +3 -0
  26. text_encoder/diffusion_pytorch_model-00005-of-00012.safetensors +3 -0
  27. text_encoder/diffusion_pytorch_model-00006-of-00012.safetensors +3 -0
  28. text_encoder/diffusion_pytorch_model-00007-of-00012.safetensors +3 -0
  29. text_encoder/diffusion_pytorch_model-00008-of-00012.safetensors +3 -0
  30. text_encoder/diffusion_pytorch_model-00009-of-00012.safetensors +3 -0
  31. text_encoder/diffusion_pytorch_model-00010-of-00012.safetensors +3 -0
  32. text_encoder/diffusion_pytorch_model-00011-of-00012.safetensors +3 -0
  33. text_encoder/diffusion_pytorch_model-00012-of-00012.safetensors +3 -0
  34. text_encoder/diffusion_pytorch_model.safetensors.index.json +0 -0
  35. text_encoder/generation_config.json +11 -0
  36. text_encoder/model-00001-of-00011.safetensors +3 -0
  37. text_encoder/model-00002-of-00011.safetensors +3 -0
  38. text_encoder/model-00003-of-00011.safetensors +3 -0
  39. text_encoder/model-00004-of-00011.safetensors +3 -0
  40. text_encoder/model-00005-of-00011.safetensors +3 -0
  41. text_encoder/model-00006-of-00011.safetensors +3 -0
  42. text_encoder/model-00007-of-00011.safetensors +3 -0
  43. text_encoder/model-00008-of-00011.safetensors +3 -0
  44. text_encoder/model-00009-of-00011.safetensors +3 -0
  45. text_encoder/model-00010-of-00011.safetensors +3 -0
  46. text_encoder/model-00011-of-00011.safetensors +3 -0
  47. text_encoder/model.safetensors.index.json +0 -0
  48. tokenizer/added_tokens.json +3 -0
  49. tokenizer/chat_template.jinja +47 -0
  50. tokenizer/preprocessor_config.json +29 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LTX-2 Community License Agreement
2
+ License date: January 5, 2026
3
+
4
+
5
+ By using or distributing any portion or element of LTX-2, you agree
6
+ to be bound by this Agreement.
7
+
8
+ 1. Definitions.
9
+
10
+ "Agreement" means the terms and conditions for the license, use,
11
+ reproduction, and distribution of LTX-2 and the Complementary
12
+ Materials, as specified in this document.
13
+
14
+ "Control" means the direct or indirect ownership of more than
15
+ fifty percent (50%) of the voting securities or other ownership
16
+ interests, or the power to direct the management and policies of
17
+ such Entity through voting rights, contract, or otherwise.
18
+
19
+ "Data" means a collection of information and/or content extracted
20
+ from the dataset used with LTX-2, including to train, pretrain,
21
+ or otherwise evaluate LTX-2. The Data is not licensed under this
22
+ Agreement.
23
+
24
+ "Derivatives of LTX-2" means all modifications to LTX-2, works
25
+ based on LTX-2, or any other model which is created or initialized
26
+ by transfer of patterns of the weights, parameters, activations or
27
+ output of LTX-2, to the other model, in order to cause the other
28
+ model to perform similarly to LTX-2, including – but not limited
29
+ to - distillation methods entailing the use of intermediate data
30
+ representations or methods based on the generation of synthetic
31
+ data by LTX-2 for training the other model. For clarity, Derivatives
32
+ of LTX-2 include: (i) any fine-tuned or adapted weights, parameters,
33
+ or checkpoints derived from LTX-2; (ii) derivative model architectures
34
+ that incorporate or are based upon LTX-2's architecture; and
35
+ (iii) any modified or extended versions of the Complementary
36
+ Materials. All intellectual property rights in Derivatives of LTX-2
37
+ shall be subject to the terms of this Agreement, and you may not
38
+ claim exclusive ownership rights in any Derivatives of LTX-2 that
39
+ would restrict the rights granted herein.
40
+
41
+ "Entity" means any individual, corporation, partnership, limited
42
+ liability company, or other legal entity. For purposes of this
43
+ Agreement, an Entity shall be deemed to include, on an aggregative
44
+ basis, all subsidiaries, affiliates, and other companies under
45
+ common Control with such Entity. When determining whether an Entity
46
+ meets any threshold under this Agreement (including revenue
47
+ thresholds), all subsidiaries, affiliates, and companies under
48
+ common Control shall be considered collectively.
49
+
50
+ "Harm" includes but is not limited to physical, mental,
51
+ psychological, financial and reputational damage, pain, or loss.
52
+
53
+ "Licensor" or "Lightricks" means the owner that is granting the
54
+ license under this Agreement. For the purposes of this Agreement,
55
+ the Licensor is Lightricks Ltd.
56
+
57
+ "LTX-2" means the large language models, text/image/video/audio/3D
58
+ generation models, and multimodal large language models and their
59
+ software and algorithms, including trained model weights, parameters
60
+ (including optimizer states), machine-learning model code,
61
+ inference-enabling code, training-enabling code, fine-tuning
62
+ enabling code, accompanying source code, scripts, documentation,
63
+ tutorials, examples, and all other elements of the foregoing
64
+ distributed and made publicly available by Lightricks (including,
65
+ for example, at https://github.com/Lightricks/LTX-2) for the LTX-2
66
+ model released on January 5, 2026. This license is applicable to
67
+ all LTX-2 versions released since January 5, 2026, and all future
68
+ releases of LTX-2 under this license.
69
+
70
+ "Output" means the results of operating LTX-2 as embodied in
71
+ informational content resulting therefrom.
72
+
73
+ "you" (or "your") means an individual or legal Entity licensing
74
+ LTX-2 in accordance with this Agreement and/or making use of LTX-2
75
+ for whichever purpose and in any field of use, including usage of
76
+ LTX-2 in an end-use application - e.g. chatbot, translator, image
77
+ generator.
78
+
79
+ 2. Grant of License. Subject to the terms and conditions of this
80
+ Agreement, you are granted a non-exclusive, worldwide,
81
+ non-transferable and royalty-free limited license under Licensor's
82
+ intellectual property or other rights owned by Licensor embodied
83
+ in LTX-2 to use, reproduce, prepare, distribute, publicly display,
84
+ publicly perform, sublicense, copy, create derivative works of,
85
+ and make modifications to LTX-2, for any purpose, subject to the
86
+ restrictions set forth in Attachment A; provided however, that
87
+ Entities with annual revenues of at least $10,000,000 (the
88
+ "Commercial Entities") are required to obtain a paid commercial
89
+ use license in order to use LTX-2 and Derivatives of LTX-2,
90
+ subject to the terms and provisions of a different license (the
91
+ "Commercial Use Agreement"), as will be provided by the Licensor.
92
+ Commercial Entities interested in such a commercial license are
93
+ required to [contact Licensor](https://ltx.io/model/licensing).
94
+ Any commercial use of LTX-2 or Derivatives of LTX-2 by the Commercial
95
+ Entities not in accordance with this Agreement and/or the Commercial
96
+ Use Agreement is strictly prohibited and shall be deemed a material
97
+ breach of this Agreement.
98
+ Such material breach will be subject, in addition to any license
99
+ fees owed to Licensor for the period such Commercial Entity used
100
+ LTX-2 (as will be determined by Licensor), to liquidated damages,
101
+ which will be paid to Licensor immediately upon demand, in an
102
+ amount equal to double the amount that would otherwise have been
103
+ paid by you for the relevant period of time. Such amount reflects
104
+ a reasonable estimation of the losses and administrative costs
105
+ incurred due to such breach. You agree and understand that this
106
+ remedy does not limit the Licensor's right to pursue other remedies
107
+ available at law or equity.
108
+
109
+ 3. Distribution and Redistribution. You may host for third parties
110
+ remote access purposes (e.g. software-as-a-service), reproduce
111
+ and distribute copies of LTX-2 or Derivatives of LTX-2 thereof in
112
+ any medium, with or without modifications, provided that you meet
113
+ the following conditions:
114
+
115
+ (a) Use-based restrictions as referenced in paragraph 4 and all
116
+ provisions of Attachment A MUST be included as an enforceable
117
+ provision by you in any type of legal agreement (e.g. a
118
+ license) governing the use and/or distribution of LTX-2 or
119
+ Derivatives of LTX-2, and you shall give notice to subsequent
120
+ users you distribute to, that LTX-2 or Derivatives of LTX-2
121
+ are subject to paragraph 4 and Attachment A in their entirety,
122
+ including all use restrictions and acceptable use policies;
123
+
124
+ (b) You must provide any third party recipients of LTX-2 or
125
+ Derivatives of LTX-2 a copy of this Agreement, including all
126
+ attachments and use policies. Any Derivative of LTX-2 (as
127
+ defined in Section 1, including but not limited to fine-tuned
128
+ weights, modified training code, models trained on Outputs, or
129
+ any other derivative) must be distributed exclusively under
130
+ the terms of this Agreement with a complete copy of this
131
+ license included;
132
+
133
+ (c) You must cause any modified files to carry prominent notices
134
+ stating that you changed the files;
135
+
136
+ (d) You must retain all copyright, patent, trademark, and
137
+ attribution notices excluding those notices that do not
138
+ pertain to any part of LTX-2, Derivatives of LTX-2.
139
+
140
+ You may add your own copyright statement to your modifications and
141
+ may provide additional or different license terms and conditions -
142
+ respecting paragraph 3(a) - for use, reproduction, or distribution
143
+ of your modifications, or for any such Derivatives of LTX-2 as a
144
+ whole, provided your use, reproduction, and distribution of LTX-2
145
+ otherwise complies with the conditions stated in this Agreement,
146
+ and you provide a complete copy of this Agreement with any such
147
+ use, reproduction and distribution of LTX-2 and any Derivatives
148
+ thereof.
149
+
150
+ 4. Use-based restrictions. The restrictions set forth in Attachment A
151
+ are considered Use-based restrictions. Therefore, you cannot use
152
+ LTX-2 and the Derivatives of LTX-2 in violation of the specified
153
+ restricted uses. You may use LTX-2 subject to this Agreement,
154
+ including only for lawful purposes and in accordance with the
155
+ Agreement. "Use" may include creating any content with, fine-tuning,
156
+ updating, running, training, evaluating and/or re-parametrizing
157
+ LTX-2. You shall require all of your users who use LTX-2 or a
158
+ Derivative of LTX-2 to comply with the terms of this paragraph 4.
159
+
160
+ 5. The Output You Generate. Except as set forth herein, Licensor
161
+ claims no rights in the Output you generate using LTX-2. You are
162
+ accountable for input you insert into LTX-2, the Output you
163
+ generate and its subsequent uses. No use of the Output can
164
+ contravene any provision as stated in the Agreement.
165
+
166
+ 6. Updates and Runtime Restrictions. To the maximum extent permitted
167
+ by law, Licensor reserves the right to restrict (remotely or
168
+ otherwise) usage of LTX-2 in violation of this Agreement, update
169
+ LTX-2 through electronic means, or modify the Output of LTX-2
170
+ based on updates. You shall undertake reasonable efforts to use
171
+ the latest version of LTX-2. Any use of the non-current version
172
+ of LTX-2 is done solely at your risk.
173
+
174
+ 7. Export Controls and Sanctions Compliance. You acknowledge that
175
+ LTX-2, Derivatives of LTX-2 may be subject to export control laws
176
+ and regulations, including but not limited to the U.S. Export
177
+ Administration Regulations and sanctions programs administered by
178
+ the Office of Foreign Assets Control (OFAC). You represent and
179
+ warrant that you and any users of LTX-2 are not (i) located in,
180
+ organized under the laws of, or ordinarily resident in any country
181
+ or territory subject to comprehensive sanctions; (ii) identified
182
+ on any U.S. government restricted party list, including the
183
+ Specially Designated Nationals and Blocked Persons List; or
184
+ (iii) otherwise prohibited from receiving LTX-2 under applicable
185
+ law. You shall not export, re-export, or transfer LTX-2, directly
186
+ or indirectly, in violation of any applicable export control or
187
+ sanctions laws or regulations. You agree to comply with all
188
+ applicable trade control laws and shall indemnify and hold
189
+ Licensor harmless from any claims arising from your failure to
190
+ comply with such laws.
191
+
192
+ 8. Trademarks and related. Nothing in this Agreement permits you to
193
+ make use of Licensor's trademarks, trade names, logos or to
194
+ otherwise suggest endorsement or misrepresent the relationship
195
+ between the parties; and any rights not expressly granted herein
196
+ are reserved by the Licensor.
197
+
198
+ 9. Disclaimer of Warranty. Unless required by applicable law or
199
+ agreed to in writing, Licensor provides LTX-2 on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
201
+ implied, including, without limitation, any warranties or
202
+ conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS
203
+ FOR A PARTICULAR PURPOSE. You are solely responsible for
204
+ determining the appropriateness of using or redistributing LTX-2
205
+ and Derivatives of LTX-2 and assume any risks associated with
206
+ your exercise of permissions under this Agreement.
207
+
208
+ 10. Limitation of Liability. In no event and under no legal theory,
209
+ whether in tort (including negligence), contract, or otherwise,
210
+ unless required by applicable law (such as deliberate and grossly
211
+ negligent acts) or agreed to in writing, shall Licensor be liable
212
+ to you for damages, including any direct, indirect, special,
213
+ incidental, or consequential damages of any character arising as
214
+ a result of this Agreement or out of the use or inability to use
215
+ LTX-2 (including but not limited to damages for loss of goodwill,
216
+ work stoppage, computer failure or malfunction, or any and all
217
+ other commercial damages or losses), even if Licensor has been
218
+ advised of the possibility of such damages.
219
+
220
+ 11. Accepting Warranty or Additional Liability. While redistributing
221
+ LTX-2 and Derivatives of LTX-2, you may, provided you do not
222
+ violate the terms of this Agreement, choose to offer and charge
223
+ a fee for, acceptance of support, warranty, indemnity, or other
224
+ liability obligations. However, in accepting such obligations,
225
+ you may act only on your own behalf and on your sole
226
+ responsibility, not on behalf of Licensor, and only if you agree
227
+ to indemnify, defend, and hold Licensor harmless for any liability
228
+ incurred by, or claims asserted against Licensor, by reason of
229
+ your accepting any such warranty or additional liability.
230
+
231
+ 12. Governing Law. This Agreement and all relations, disputes, claims
232
+ and other matters arising hereunder (including non-contractual
233
+ disputes or claims) will be governed exclusively by, and construed
234
+ exclusively in accordance with, the laws of the State of New York.
235
+ To the extent permitted by law, choice of laws rules and the
236
+ United Nations Convention on Contracts for the International Sale
237
+ of Goods will not apply. For the purposes of adjudicating any
238
+ action or proceeding to enforce the terms of this Agreement, you
239
+ hereby irrevocably consent to the exclusive jurisdiction of, and
240
+ venue in, the federal and state courts located in the County of
241
+ New York within the State of New York. The prevailing party in
242
+ any claim or dispute between the parties under this Agreement
243
+ will be entitled to reimbursement of its reasonable attorneys'
244
+ fees and costs. You hereby waive the right to a trial by jury,
245
+ to participate in a class or representative action (including in
246
+ arbitration), or to combine individual proceedings in court or
247
+ in arbitration without the consent of all parties.
248
+
249
+ 13. Term and Termination. This Agreement is effective upon your
250
+ acceptance and continues until terminated. Licensor may terminate
251
+ this Agreement immediately upon written notice to you if you
252
+ breach any provision of this Agreement, including but not limited
253
+ to violations of the use restrictions in Attachment A or
254
+ unauthorized commercial use. Upon termination: (a) all rights
255
+ granted to you under this Agreement will immediately cease;
256
+ (b) you must immediately cease all use of LTX-2 and Derivatives
257
+ of LTX-2; (c) you must delete or destroy all copies of LTX-2
258
+ and Derivatives of LTX-2 in your possession or control; and
259
+ (d) you must notify any third parties to whom you distributed
260
+ LTX-2 or Derivatives of LTX-2 of the termination. Sections 8-13,
261
+ and Section 15 shall survive termination of this Agreement.
262
+ Termination does not relieve you of any obligations incurred
263
+ prior to termination, including payment obligations under
264
+ Section 2. In addition, if You commence a lawsuit or other
265
+ proceedings (including a cross-claim or counterclaim in a lawsuit)
266
+ against Licensor or any person or entity alleging that LTX-2 or
267
+ any Output, or any portion of any of the foregoing, infringe any
268
+ intellectual property or other right owned or licensable by you,
269
+ then all licenses granted to you under this Agreement shall
270
+ terminate as of the date such lawsuit or other proceeding is filed.
271
+
272
+ 14. Disputes and Arbitration. All disputes arising in connection with
273
+ this Agreement shall be finally settled by arbitration under the
274
+ Rules of Arbitration of the International Chamber of Commerce
275
+ ("ICC Rules"), by one (1) arbitrator appointed in accordance with
276
+ the ICC Rules. The seat of arbitration shall be New York, NY, USA,
277
+ and the proceedings shall be conducted in English. The arbitrator
278
+ shall be empowered to grant any relief that a court could grant.
279
+ Judgment on the arbitration award may be entered by any court
280
+ having jurisdiction thereof. Each party waives its right to a
281
+ trial by jury and to participate in any class or representative
282
+ action.
283
+
284
+ 15. If any provision of this Agreement is held to be
285
+ invalid, illegal
286
+ or unenforceable, the remaining provisions shall be unaffected
287
+ thereby and remain valid as if such provision had not been set
288
+ forth herein.
289
+
290
+ END OF TERMS AND CONDITIONS
291
+
292
+ ATTACHMENT A: Use Restrictions
293
+
294
+ When using the Outputs, LTX-2 and any Derivatives thereof, you
295
+ will comply with the Acceptable Use Policy. In addition, you
296
+ agree not to use the Outputs, LTX-2 or its Derivatives in any
297
+ of the following ways:
298
+
299
+ 1. In any way that violates any applicable national, federal,
300
+ state, local or international law or regulation;
301
+
302
+ 2. For the purpose of exploiting, Harming or attempting to
303
+ exploit or Harm minors in any way;
304
+
305
+ 3. To generate or disseminate false information and/or content
306
+ with the purpose of Harming others;
307
+
308
+ 4. To generate or disseminate personal identifiable information
309
+ that can be used to Harm an individual;
310
+
311
+ 5. To generate or disseminate information and/or content (e.g.
312
+ images, code, posts, articles), and place the information
313
+ and/or content in any context (e.g. bot generating tweets)
314
+ without expressly and intelligibly disclaiming that the
315
+ information and/or content is machine generated;
316
+
317
+ 6. To defame, disparage or otherwise harass others;
318
+
319
+ 7. To impersonate or attempt to impersonate (e.g. deepfakes)
320
+ others without their consent;
321
+
322
+ 8. For fully automated decision making that adversely impacts an
323
+ individual's legal rights or otherwise creates or modifies a
324
+ binding, enforceable obligation;
325
+
326
+ 9. For any use intended to or which has the effect of
327
+ discriminating against or Harming individuals or groups based
328
+ on online or offline social behavior or known or predicted
329
+ personal or personality characteristics;
330
+
331
+ 10. To exploit any of the vulnerabilities of a specific group of
332
+ persons based on their age, social, physical or mental
333
+ characteristics, in order to materially distort the behavior
334
+ of a person pertaining to that group in a manner that causes
335
+ or is likely to cause that person or another person physical
336
+ or psychological Harm;
337
+
338
+ 11. For any use intended to or which has the effect of
339
+ discriminating against individuals or groups based on legally
340
+ protected characteristics or categories;
341
+
342
+ 12. To provide medical advice and medical results interpretation;
343
+
344
+ 13. To generate or disseminate information for the purpose to be
345
+ used for administration of justice, law enforcement,
346
+ immigration or asylum processes, such as predicting an
347
+ individual will commit fraud/crime commitment (e.g. by text
348
+ profiling, drawing causal relationships between assertions
349
+ made in documents, indiscriminate and arbitrarily-targeted use);
350
+
351
+ 14. To generate and/or disseminate malware (including – but not
352
+ limited to – ransomware) or any other content to be used for
353
+ the purpose of harming electronic systems;
354
+
355
+ 15. To engage in, promote, incite, or facilitate discrimination
356
+ or other unlawful or harmful conduct in the provision of
357
+ employment, employment benefits, credit, housing, or other
358
+ essential goods and services;
359
+
360
+ 16. To engage in, promote, incite, or facilitate the harassment,
361
+ abuse, threatening, or bullying of individuals or groups of
362
+ individuals;
363
+
364
+ 17. For military, warfare, nuclear industries or applications,
365
+ weapons development, or any use in connection with activities
366
+ that may cause death, personal injury, or severe physical or
367
+ environmental damage;
368
+
369
+ 18. For commercial use only: To train, improve, or fine-tune any
370
+ other machine learning model, artificial intelligence system,
371
+ or competing model, except for Derivatives of LTX-2 as
372
+ expressly permitted under this Agreement;
373
+
374
+ 19. To circumvent, disable, or interfere with any technical
375
+ limitations, safety features, content filters, or use
376
+ restrictions implemented in LTX-2 by Licensor;
377
+
378
+ 20. To use LTX-2 or Derivatives of LTX-2 in any product, service,
379
+ or application that directly competes with Licensor's
380
+ commercial products or services, or is designed to replace or
381
+ substitute Licensor's offerings in the market, without
382
+ obtaining a separate commercial license from Licensor.
README.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ - de
5
+ - es
6
+ - fr
7
+ - ja
8
+ - ko
9
+ - zh
10
+ - it
11
+ - pt
12
+ library_name: diffusers
13
+ license: other
14
+ license_name: ltx-2-community-license-agreement
15
+ license_link: https://github.com/Lightricks/LTX-2/blob/main/LICENSE
16
+ pipeline_tag: image-to-video
17
+ arxiv: 2601.03233
18
+ tags:
19
+ - image-to-video
20
+ - text-to-video
21
+ - video-to-video
22
+ - image-text-to-video
23
+ - audio-to-video
24
+ - text-to-audio
25
+ - video-to-audio
26
+ - audio-to-audio
27
+ - text-to-audio-video
28
+ - image-to-audio-video
29
+ - image-text-to-audio-video
30
+ - ltx-2
31
+ - ltx-video
32
+ - ltxv
33
+ - lightricks
34
+ pinned: true
35
+ demo: https://app.ltx.studio/ltx-2-playground/i2v
36
+ ---
37
+
38
+ # LTX-2 Model Card
39
+
40
+ This model card focuses on the LTX-2 model, as presented in the paper [LTX-2: Efficient Joint Audio-Visual Foundation Model](https://huggingface.co/papers/2601.03233). The codebase is available [here](https://github.com/Lightricks/LTX-2).
41
+
42
+ LTX-2 is a DiT-based audio-video foundation model designed to generate synchronized video and audio within a single model. It brings together the core building blocks of modern video generation, with open weights and a focus on practical, local execution.
43
+
44
+ [![LTX-2 Open Source](https://img.youtube.com/vi/8fWAJXZJbRA/maxresdefault.jpg)](https://www.youtube.com/watch?v=8fWAJXZJbRA)
45
+
46
+ # Model Checkpoints
47
+
48
+ | Name | Notes |
49
+ |--------------------------------|----------------------------------------------------------------------------------------------------------------|
50
+ | ltx-2-19b-dev | The full model, flexible and trainable in bf16 |
51
+ | ltx-2-19b-dev-fp8 | The full model in fp8 quantization |
52
+ | ltx-2-19b-dev-fp4 | The full model in nvfp4 quantization |
53
+ | ltx-2-19b-distilled | The distilled version of the full model, 8 steps, CFG=1 |
54
+ | ltx-2-19b-distilled-lora-384 | A LoRA version of the distilled model applicable to the full model |
55
+ | ltx-2-spatial-upscaler-x2-1.0 | An x2 spatial upscaler for the ltx-2 latents, used in multi stage (multiscale) pipelines for higher resolution |
56
+ | ltx-2-temporal-upscaler-x2-1.0 | An x2 temporal upscaler for the ltx-2 latents, used in multi stage (multiscale) pipelines for higher FPS |
57
+
58
+ ## Model Details
59
+ - **Developed by:** Lightricks
60
+ - **Model type:** Diffusion-based audio-video foundation model
61
+ - **Language(s):** English
62
+
63
+ # Online demo
64
+ LTX-2 is accessible right away via the following links:
65
+ - [LTX-Studio text-to-video](https://app.ltx.studio/ltx-2-playground/t2v)
66
+ - [LTX-Studio image-to-video](https://app.ltx.studio/ltx-2-playground/i2v)
67
+
68
+ # Run locally
69
+
70
+ ## Direct use license
71
+ You can use the models - full, distilled, upscalers and any derivatives of the models - for purposes under the [license](./LICENSE).
72
+
73
+ ## ComfyUI
74
+ We recommend you use the built-in LTXVideo nodes that can be found in the ComfyUI Manager.
75
+ For manual installation information, please refer to our [documentation site](https://docs.ltx.video/open-source-model/integration-tools/comfy-ui).
76
+
77
+ ## PyTorch codebase
78
+
79
+ The [LTX-2 codebase](https://github.com/Lightricks/LTX-2) is a monorepo with several packages. From model definition in 'ltx-core' to pipelines in 'ltx-pipelines' and training capabilities in 'ltx-trainer'.
80
+ The codebase was tested with Python >=3.12, CUDA version >12.7, and supports PyTorch ~= 2.7.
81
+
82
+ ### Installation
83
+
84
+ ```bash
85
+ git clone https://github.com/Lightricks/LTX-2.git
86
+ cd LTX-2
87
+
88
+ # From the repository root
89
+ uv sync
90
+ source .venv/bin/activate
91
+ ```
92
+
93
+ ### Inference
94
+
95
+ To use our model, please follow the instructions in our [ltx-pipelines](https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/README.md) package.
96
+
97
+ ## Diffusers 🧨
98
+
99
+ LTX-2 is supported in the [Diffusers Python library](https://huggingface.co/docs/diffusers/main/en/index) for image-to-video generation.
100
+
101
+ ## General tips:
102
+ * Width & height settings must be divisible by 32. Frame count must be divisible by 8 + 1.
103
+ * In case the resolution or number of frames are not divisible by 32 or 8 + 1, the input should be padded with -1 and then cropped to the desired resolution and number of frames.
104
+ * For tips on writing effective prompts, please visit our [Prompting guide](https://ltx.video/blog/how-to-prompt-for-ltx-2)
105
+
106
+ ### Limitations
107
+ - This model is not intended or able to provide factual information.
108
+ - As a statistical model this checkpoint might amplify existing societal biases.
109
+ - The model may fail to generate videos that matches the prompts perfectly.
110
+ - Prompt following is heavily influenced by the prompting-style.
111
+ - The model may generate content that is inappropriate or offensive.
112
+ - When generating audio without speech, the audio may be of lower quality.
113
+
114
+ # Train the model
115
+
116
+ The base (dev) model is fully trainable.
117
+
118
+ It's extremely easy to reproduce the LoRAs and IC-LoRAs we publish with the model by following the instructions on the [LTX-2 Trainer Readme](https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-trainer/README.md).
119
+
120
+ Training for motion, style or likeness (sound+appearance) can take less than an hour in many settings.
121
+
122
+ ## Citation
123
+
124
+ ```bibtex
125
+ @article{hacohen2025ltx2,
126
+ title={LTX-2: Efficient Joint Audio-Visual Foundation Model},
127
+ author={HaCohen, Yoav and Brazowski, Benny and Chiprut, Nisan and Bitterman, Yaki and Kvochko, Andrew and Berkowitz, Avishai and Shalem, Daniel and Lifschitz, Daphna and Moshe, Dudu and Porat, Eitan and Richardson, Eitan and Guy Shiran and Itay Chachy and Jonathan Chetboun and Michael Finkelson and Michael Kupchick and Nir Zabari and Nitzan Guetta and Noa Kotler and Ofir Bibi and Ori Gordon and Poriya Panet and Roi Benita and Shahar Armon and Victor Kulikov and Yaron Inger and Yonatan Shiftan and Zeev Melumian and Zeev Farbman},
128
+ journal={arXiv preprint arXiv:2601.03233},
129
+ year={2025}
130
+ }
131
+ ```
audio_vae/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLLTX2Audio",
3
+ "_diffusers_version": "0.37.0.dev0",
4
+ "attn_resolutions": null,
5
+ "base_channels": 128,
6
+ "causality_axis": "height",
7
+ "ch_mult": [
8
+ 1,
9
+ 2,
10
+ 4
11
+ ],
12
+ "double_z": true,
13
+ "dropout": 0.0,
14
+ "in_channels": 2,
15
+ "is_causal": true,
16
+ "latent_channels": 8,
17
+ "mel_bins": 64,
18
+ "mel_hop_length": 160,
19
+ "mid_block_add_attention": false,
20
+ "norm_type": "pixel",
21
+ "num_res_blocks": 2,
22
+ "output_channels": 2,
23
+ "resolution": 256,
24
+ "sample_rate": 16000
25
+ }
audio_vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b36ce4066065ce0aa5ff4d7cf96a3e3bc0859c6fefcf864663e3fe686c5c181c
3
+ size 106507972
connectors/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LTX2TextConnectors",
3
+ "_diffusers_version": "0.37.0.dev0",
4
+ "audio_connector_attention_head_dim": 128,
5
+ "audio_connector_num_attention_heads": 30,
6
+ "audio_connector_num_layers": 2,
7
+ "audio_connector_num_learnable_registers": 128,
8
+ "caption_channels": 3840,
9
+ "causal_temporal_positioning": false,
10
+ "connector_rope_base_seq_len": 4096,
11
+ "rope_double_precision": true,
12
+ "rope_theta": 10000.0,
13
+ "rope_type": "split",
14
+ "text_proj_in_factor": 49,
15
+ "video_connector_attention_head_dim": 128,
16
+ "video_connector_num_attention_heads": 30,
17
+ "video_connector_num_layers": 2,
18
+ "video_connector_num_learnable_registers": 128
19
+ }
connectors/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7c0ad36c2d0706fb229193d5c698f0ef50c9b33678140b4ee84723a047b4032
3
+ size 2862957976
latent_upsampler/config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LTX2LatentUpsamplerModel",
3
+ "_diffusers_version": "0.37.0.dev0",
4
+ "dims": 3,
5
+ "in_channels": 128,
6
+ "mid_channels": 1024,
7
+ "num_blocks_per_stage": 4,
8
+ "rational_spatial_scale": 2.0,
9
+ "spatial_upsample": true,
10
+ "temporal_upsample": false
11
+ }
latent_upsampler/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56276acbffb30f97824b4c2fd6770e8157d65e5be7a93e2307393c1ebbb1f12
3
+ size 995743482
ltx-2-19b-dev-fp4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a28245d8412962a8f6a1437c7db4b07bd5c2acdb2b84f96793a9b7c8550751
3
+ size 19988416674
ltx-2-19b-dev-fp8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a67e709b6d1adc061cb19921887a5c15754178199e45801a04310e9b522760d
3
+ size 27078716018
ltx-2-19b-dev.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a51e70aad660e55648d6f0b8af15c8acaaffc06e2a4ae7c7cb01ede701981a8
3
+ size 43285058242
ltx-2-19b-distilled-fp8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ae14327130c6ffdc87705b02c8e7654aa5c6d9a7f28a52d0acc1c30cb0d2932
3
+ size 27078716346
ltx-2-19b-distilled-lora-384.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2718f89582003cbb5b616635f18c091641917a3f3e5a2f2ad0fb3d5fdd153534
3
+ size 7674558424
ltx-2-19b-distilled.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4006d689061cde0967b9d96eaf44253ff08f5de0c78e5fa1331a763cd03ee28
3
+ size 43285058186
ltx-2-running-local.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0bde52079dd8cc7bdad1d1594d0db80a530d08d854c4f9fa423bef262da965
3
+ size 15736325
ltx-2-spatial-upscaler-x2-1.0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3160fabf8edf0bc4dd8de40353a180813b111ce586b655ad54af9a7b8c6736de
3
+ size 995765578
ltx-2-temporal-upscaler-x2-1.0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a35c2eb92f6ed39369fcb83045daa070bc7c2a97fc7267abd6291203fd05b88
3
+ size 261965800
model_index.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LTX2Pipeline",
3
+ "_diffusers_version": "0.37.0.dev0",
4
+ "audio_vae": [
5
+ "diffusers",
6
+ "AutoencoderKLLTX2Audio"
7
+ ],
8
+ "connectors": [
9
+ "ltx2",
10
+ "LTX2TextConnectors"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "FlowMatchEulerDiscreteScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "Gemma3ForConditionalGeneration"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "GemmaTokenizerFast"
23
+ ],
24
+ "transformer": [
25
+ "diffusers",
26
+ "LTX2VideoTransformer3DModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKLLTX2Video"
31
+ ],
32
+ "vocoder": [
33
+ "ltx2",
34
+ "LTX2Vocoder"
35
+ ]
36
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.37.0.dev0",
4
+ "base_image_seq_len": 1024,
5
+ "base_shift": 0.95,
6
+ "invert_sigmas": false,
7
+ "max_image_seq_len": 4096,
8
+ "max_shift": 2.05,
9
+ "num_train_timesteps": 1000,
10
+ "shift": 1.0,
11
+ "shift_terminal": 0.1,
12
+ "stochastic_sampling": false,
13
+ "time_shift_type": "exponential",
14
+ "use_beta_sigmas": false,
15
+ "use_dynamic_shifting": true,
16
+ "use_exponential_sigmas": false,
17
+ "use_karras_sigmas": false
18
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForConditionalGeneration"
4
+ ],
5
+ "boi_token_index": 255999,
6
+ "dtype": "float32",
7
+ "eoi_token_index": 256000,
8
+ "eos_token_id": [
9
+ 1,
10
+ 106
11
+ ],
12
+ "image_token_index": 262144,
13
+ "initializer_range": 0.02,
14
+ "mm_tokens_per_image": 256,
15
+ "model_type": "gemma3",
16
+ "text_config": {
17
+ "_sliding_window_pattern": 6,
18
+ "attention_bias": false,
19
+ "attention_dropout": 0.0,
20
+ "attn_logit_softcapping": null,
21
+ "cache_implementation": "hybrid",
22
+ "dtype": "float32",
23
+ "final_logit_softcapping": null,
24
+ "head_dim": 256,
25
+ "hidden_activation": "gelu_pytorch_tanh",
26
+ "hidden_size": 3840,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 15360,
29
+ "layer_types": [
30
+ "sliding_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "full_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "full_attention",
42
+ "sliding_attention",
43
+ "sliding_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "full_attention",
48
+ "sliding_attention",
49
+ "sliding_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "full_attention",
54
+ "sliding_attention",
55
+ "sliding_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "full_attention",
60
+ "sliding_attention",
61
+ "sliding_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "sliding_attention",
65
+ "full_attention",
66
+ "sliding_attention",
67
+ "sliding_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "sliding_attention",
71
+ "full_attention",
72
+ "sliding_attention",
73
+ "sliding_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "sliding_attention",
77
+ "full_attention"
78
+ ],
79
+ "max_position_embeddings": 131072,
80
+ "model_type": "gemma3_text",
81
+ "num_attention_heads": 16,
82
+ "num_hidden_layers": 48,
83
+ "num_key_value_heads": 8,
84
+ "query_pre_attn_scalar": 256,
85
+ "rms_norm_eps": 1e-06,
86
+ "rope_local_base_freq": 10000,
87
+ "rope_scaling": {
88
+ "factor": 8.0,
89
+ "rope_type": "linear"
90
+ },
91
+ "rope_theta": 1000000,
92
+ "sliding_window": 1024,
93
+ "sliding_window_pattern": 6,
94
+ "use_bidirectional_attention": false,
95
+ "use_cache": true,
96
+ "vocab_size": 262208
97
+ },
98
+ "transformers_version": "4.57.3",
99
+ "vision_config": {
100
+ "attention_dropout": 0.0,
101
+ "dtype": "float32",
102
+ "hidden_act": "gelu_pytorch_tanh",
103
+ "hidden_size": 1152,
104
+ "image_size": 896,
105
+ "intermediate_size": 4304,
106
+ "layer_norm_eps": 1e-06,
107
+ "model_type": "siglip_vision_model",
108
+ "num_attention_heads": 16,
109
+ "num_channels": 3,
110
+ "num_hidden_layers": 27,
111
+ "patch_size": 14,
112
+ "vision_use_head": false
113
+ }
114
+ }
text_encoder/diffusion_pytorch_model-00001-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ffef2cbc9908f6db15a735a12c412c106ff7f112b3d4da72bc98c00bc2c034
3
+ size 1685231024
text_encoder/diffusion_pytorch_model-00002-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308270af3b7caa5d2cd0076dff5a2dd9f0020d6628fe2d2ee04fa597cb066fbb
3
+ size 4987027560
text_encoder/diffusion_pytorch_model-00003-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523d1b6d3ba4b9ede7a5e6f7df7599bdb12eeab23099694293ab2bbbfa62cc6f
3
+ size 4844750680
text_encoder/diffusion_pytorch_model-00004-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf426cf00fe66fa5fd48d2acae77082f7f423c71c55d9c7a8da26232e852b7a0
3
+ size 4954910584
text_encoder/diffusion_pytorch_model-00005-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c8cec1fc6d7024b8fcf4517b79ca0df34279e4d6767423a2229772c1a9d5e3
3
+ size 4907665448
text_encoder/diffusion_pytorch_model-00006-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a5ec996bdd602cfebba1fa7f06f6942643032b353b13a0fd1a8c00382efb24
3
+ size 4954910640
text_encoder/diffusion_pytorch_model-00007-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5594441c5b83d7404a16ebf5ec51e0947b9639b62561e2442170c0b6e0069502
3
+ size 4907665448
text_encoder/diffusion_pytorch_model-00008-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b333d3bb47641e91e6fa2cff9580b25463a5d76a1b1a272b77d3d6c0fe78a556
3
+ size 4954910640
text_encoder/diffusion_pytorch_model-00009-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34db39ec863ee8c357f4247455bca8eabba9f3ccb9f838daf795db04b1919250
3
+ size 4907665448
text_encoder/diffusion_pytorch_model-00010-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e72953188ecbdf2a526371b46f66bfd27c58d5ad622bf5c4147aeab7ddb83cb
3
+ size 4954910640
text_encoder/diffusion_pytorch_model-00011-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29993bd9711eba9336246990ffa2cb6ae584816cad0249b6a0bc3729d95bb869
3
+ size 4962817760
text_encoder/diffusion_pytorch_model-00012-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a8f0f23c87c36285a10632fabfb2c091f211244d124b9c63074debba6e6b21
3
+ size 589949224
text_encoder/diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
text_encoder/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cache_implementation": "hybrid",
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106
7
+ ],
8
+ "top_k": 64,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.57.3"
11
+ }
text_encoder/model-00001-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc6e8132e4998652d4c8c219f0ca8da10a143606c806c8728a7513d49bbcdb3
3
+ size 1685223128
text_encoder/model-00002-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95e7ab472b88860e53c71ef078fe8ba4c85e3727b8f0a0bcc130c1f4a2b9ab9
3
+ size 4987027384
text_encoder/model-00003-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3731e7c18280d0e9dcbcfadaecaeb8b81fa7f7f15e702ea3d1b4f8eb1b5919f7
3
+ size 4844749824
text_encoder/model-00004-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d1ce8b472f2cc6d70c7885388f50fb3a5f233cf1d4784f6a4be1732547a74c
3
+ size 4954909736
text_encoder/model-00005-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb478659a67b2e34a920f237387a1d7a8208325aa2fc285198a97603f15af1e6
3
+ size 4907664584
text_encoder/model-00006-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a190581d871980f9309f40d5c2b4db99d76737d7ed743d531bac95443f6b7145
3
+ size 4954909792
text_encoder/model-00007-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c347de789ff34be642e4fe7ab8e142ca5e3d833d70cb9ec5127a86af0e2ecfed
3
+ size 4907664584
text_encoder/model-00008-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec7525b89b083d774f4abbdd1f2d7ee190f0475658875cf700530a2faa84a4f
3
+ size 4954909792
text_encoder/model-00009-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0117ecf1d83691bb875c66a9f2b47e450a87fdfe90915bc7b9e9946c37a650
3
+ size 4907664584
text_encoder/model-00010-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f665a743589231adb812c61c9d8e295c9a68c31d3e87976ae032f1de4e46b6
3
+ size 4954909792
text_encoder/model-00011-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999bf4706d4f616e558eec486667fc2b66f8d0f9c106e1d1f802458fb349b0db
3
+ size 2689808472
text_encoder/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
tokenizer/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
tokenizer/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }