ofirbibi commited on Feb 14

Commit

cce0eda

0 Parent(s):

Duplicate from Lightricks/LTX-2

Browse files

Co-authored-by: Ofir Bibi <ofirbibi@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +37 -0
LICENSE +382 -0
README.md +131 -0
audio_vae/config.json +25 -0
audio_vae/diffusion_pytorch_model.safetensors +3 -0
connectors/config.json +19 -0
connectors/diffusion_pytorch_model.safetensors +3 -0
latent_upsampler/config.json +11 -0
latent_upsampler/diffusion_pytorch_model.safetensors +3 -0
ltx-2-19b-dev-fp4.safetensors +3 -0
ltx-2-19b-dev-fp8.safetensors +3 -0
ltx-2-19b-dev.safetensors +3 -0
ltx-2-19b-distilled-fp8.safetensors +3 -0
ltx-2-19b-distilled-lora-384.safetensors +3 -0
ltx-2-19b-distilled.safetensors +3 -0
ltx-2-running-local.mp4 +3 -0
ltx-2-spatial-upscaler-x2-1.0.safetensors +3 -0
ltx-2-temporal-upscaler-x2-1.0.safetensors +3 -0
model_index.json +36 -0
scheduler/scheduler_config.json +18 -0
text_encoder/config.json +114 -0
text_encoder/diffusion_pytorch_model-00001-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00002-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00003-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00004-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00005-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00006-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00007-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00008-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00009-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00010-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00011-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model-00012-of-00012.safetensors +3 -0
text_encoder/diffusion_pytorch_model.safetensors.index.json +0 -0
text_encoder/generation_config.json +11 -0
text_encoder/model-00001-of-00011.safetensors +3 -0
text_encoder/model-00002-of-00011.safetensors +3 -0
text_encoder/model-00003-of-00011.safetensors +3 -0
text_encoder/model-00004-of-00011.safetensors +3 -0
text_encoder/model-00005-of-00011.safetensors +3 -0
text_encoder/model-00006-of-00011.safetensors +3 -0
text_encoder/model-00007-of-00011.safetensors +3 -0
text_encoder/model-00008-of-00011.safetensors +3 -0
text_encoder/model-00009-of-00011.safetensors +3 -0
text_encoder/model-00010-of-00011.safetensors +3 -0
text_encoder/model-00011-of-00011.safetensors +3 -0
text_encoder/model.safetensors.index.json +0 -0
tokenizer/added_tokens.json +3 -0
tokenizer/chat_template.jinja +47 -0
tokenizer/preprocessor_config.json +29 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,382 @@

+                         LTX-2 Community License Agreement
+                           License date: January 5, 2026
+By using or distributing any portion or element of LTX-2, you agree
+to be bound by this Agreement.
+   1. Definitions.
+      "Agreement" means the terms and conditions for the license, use,
+      reproduction, and distribution of LTX-2 and the Complementary
+      Materials, as specified in this document.
+      "Control" means the direct or indirect ownership of more than
+      fifty percent (50%) of the voting securities or other ownership
+      interests, or the power to direct the management and policies of
+      such Entity through voting rights, contract, or otherwise.
+      "Data" means a collection of information and/or content extracted
+      from the dataset used with LTX-2, including to train, pretrain,
+      or otherwise evaluate LTX-2. The Data is not licensed under this
+      Agreement.
+      "Derivatives of LTX-2" means all modifications to LTX-2, works
+      based on LTX-2, or any other model which is created or initialized
+      by transfer of patterns of the weights, parameters, activations or
+      output of LTX-2, to the other model, in order to cause the other
+      model to perform similarly to LTX-2, including – but not limited
+      to - distillation methods entailing the use of intermediate data
+      representations or methods based on the generation of synthetic
+      data by LTX-2 for training the other model. For clarity, Derivatives
+      of LTX-2 include: (i) any fine-tuned or adapted weights, parameters,
+      or checkpoints derived from LTX-2; (ii) derivative model architectures
+      that incorporate or are based upon LTX-2's architecture; and
+      (iii) any modified or extended versions of the Complementary
+      Materials. All intellectual property rights in Derivatives of LTX-2
+      shall be subject to the terms of this Agreement, and you may not
+      claim exclusive ownership rights in any Derivatives of LTX-2 that
+      would restrict the rights granted herein.
+      "Entity" means any individual, corporation, partnership, limited
+      liability company, or other legal entity. For purposes of this
+      Agreement, an Entity shall be deemed to include, on an aggregative
+      basis, all subsidiaries, affiliates, and other companies under
+      common Control with such Entity. When determining whether an Entity
+      meets any threshold under this Agreement (including revenue
+      thresholds), all subsidiaries, affiliates, and companies under
+      common Control shall be considered collectively.
+      "Harm" includes but is not limited to physical, mental,
+      psychological, financial and reputational damage, pain, or loss.
+      "Licensor" or "Lightricks" means the owner that is granting the
+      license under this Agreement. For the purposes of this Agreement,
+      the Licensor is Lightricks Ltd.
+      "LTX-2" means the large language models, text/image/video/audio/3D
+      generation models, and multimodal large language models and their
+      software and algorithms, including trained model weights, parameters
+      (including optimizer states), machine-learning model code,
+      inference-enabling code, training-enabling code, fine-tuning
+      enabling code, accompanying source code, scripts, documentation,
+      tutorials, examples, and all other elements of the foregoing
+      distributed and made publicly available by Lightricks (including,
+      for example, at https://github.com/Lightricks/LTX-2) for the LTX-2
+      model released on January 5, 2026. This license is applicable to
+      all LTX-2 versions released since January 5, 2026, and all future
+      releases of LTX-2 under this license.
+      "Output" means the results of operating LTX-2 as embodied in
+      informational content resulting therefrom.
+      "you" (or "your") means an individual or legal Entity licensing
+      LTX-2 in accordance with this Agreement and/or making use of LTX-2
+      for whichever purpose and in any field of use, including usage of
+      LTX-2 in an end-use application - e.g. chatbot, translator, image
+      generator.
+   2. Grant of License. Subject to the terms and conditions of this
+      Agreement, you are granted a non-exclusive, worldwide,
+      non-transferable and royalty-free limited license under Licensor's
+      intellectual property or other rights owned by Licensor embodied
+      in LTX-2 to use, reproduce, prepare, distribute, publicly display,
+      publicly perform, sublicense, copy, create derivative works of,
+      and make modifications to LTX-2, for any purpose, subject to the
+      restrictions set forth in Attachment A; provided however, that
+      Entities with annual revenues of at least $10,000,000 (the
+      "Commercial Entities") are required to obtain a paid commercial
+      use license in order to use LTX-2 and Derivatives of LTX-2,
+      subject to the terms and provisions of a different license (the
+      "Commercial Use Agreement"), as will be provided by the Licensor.
+      Commercial Entities interested in such a commercial license are
+      required to [contact Licensor](https://ltx.io/model/licensing).
+      Any commercial use of LTX-2 or Derivatives of LTX-2 by the Commercial
+      Entities not in accordance with this Agreement and/or the Commercial
+      Use Agreement is strictly prohibited and shall be deemed a material
+      breach of this Agreement.
+      Such material breach will be subject, in addition to any license
+      fees owed to Licensor for the period such Commercial Entity used
+      LTX-2 (as will be determined by Licensor), to liquidated damages,
+      which will be paid to Licensor immediately upon demand, in an
+      amount equal to double the amount that would otherwise have been
+      paid by you for the relevant period of time. Such amount reflects
+      a reasonable estimation of the losses and administrative costs
+      incurred due to such breach. You agree and understand that this
+      remedy does not limit the Licensor's right to pursue other remedies
+      available at law or equity.
+   3. Distribution and Redistribution. You may host for third parties
+      remote access purposes (e.g. software-as-a-service), reproduce
+      and distribute copies of LTX-2 or Derivatives of LTX-2 thereof in
+      any medium, with or without modifications, provided that you meet
+      the following conditions:
+      (a) Use-based restrictions as referenced in paragraph 4 and all
+          provisions of Attachment A MUST be included as an enforceable
+          provision by you in any type of legal agreement (e.g. a
+          license) governing the use and/or distribution of LTX-2 or
+          Derivatives of LTX-2, and you shall give notice to subsequent
+          users you distribute to, that LTX-2 or Derivatives of LTX-2
+          are subject to paragraph 4 and Attachment A in their entirety,
+          including all use restrictions and acceptable use policies;
+      (b) You must provide any third party recipients of LTX-2 or
+          Derivatives of LTX-2 a copy of this Agreement, including all
+          attachments and use policies. Any Derivative of LTX-2 (as
+          defined in Section 1, including but not limited to fine-tuned
+          weights, modified training code, models trained on Outputs, or
+          any other derivative) must be distributed exclusively under
+          the terms of this Agreement with a complete copy of this
+          license included;
+      (c) You must cause any modified files to carry prominent notices
+          stating that you changed the files;
+      (d) You must retain all copyright, patent, trademark, and
+          attribution notices excluding those notices that do not
+          pertain to any part of LTX-2, Derivatives of LTX-2.
+      You may add your own copyright statement to your modifications and
+      may provide additional or different license terms and conditions -
+      respecting paragraph 3(a) - for use, reproduction, or distribution
+      of your modifications, or for any such Derivatives of LTX-2 as a
+      whole, provided your use, reproduction, and distribution of LTX-2
+      otherwise complies with the conditions stated in this Agreement,
+      and you provide a complete copy of this Agreement with any such
+      use, reproduction and distribution of LTX-2 and any Derivatives
+      thereof.
+   4. Use-based restrictions. The restrictions set forth in Attachment A
+      are considered Use-based restrictions. Therefore, you cannot use
+      LTX-2 and the Derivatives of LTX-2 in violation of the specified
+      restricted uses. You may use LTX-2 subject to this Agreement,
+      including only for lawful purposes and in accordance with the
+      Agreement. "Use" may include creating any content with, fine-tuning,
+      updating, running, training, evaluating and/or re-parametrizing
+      LTX-2. You shall require all of your users who use LTX-2 or a
+      Derivative of LTX-2 to comply with the terms of this paragraph 4.
+   5. The Output You Generate. Except as set forth herein, Licensor
+      claims no rights in the Output you generate using LTX-2. You are
+      accountable for input you insert into LTX-2, the Output you
+      generate and its subsequent uses. No use of the Output can
+      contravene any provision as stated in the Agreement.
+   6. Updates and Runtime Restrictions. To the maximum extent permitted
+      by law, Licensor reserves the right to restrict (remotely or
+      otherwise) usage of LTX-2 in violation of this Agreement, update
+      LTX-2 through electronic means, or modify the Output of LTX-2
+      based on updates. You shall undertake reasonable efforts to use
+      the latest version of LTX-2. Any use of the non-current version
+      of LTX-2 is done solely at your risk.
+   7. Export Controls and Sanctions Compliance. You acknowledge that
+      LTX-2, Derivatives of LTX-2 may be subject to export control laws
+      and regulations, including but not limited to the U.S. Export
+      Administration Regulations and sanctions programs administered by
+      the Office of Foreign Assets Control (OFAC). You represent and
+      warrant that you and any users of LTX-2 are not (i) located in,
+      organized under the laws of, or ordinarily resident in any country
+      or territory subject to comprehensive sanctions; (ii) identified
+      on any U.S. government restricted party list, including the
+      Specially Designated Nationals and Blocked Persons List; or
+      (iii) otherwise prohibited from receiving LTX-2 under applicable
+      law. You shall not export, re-export, or transfer LTX-2, directly
+      or indirectly, in violation of any applicable export control or
+      sanctions laws or regulations. You agree to comply with all
+      applicable trade control laws and shall indemnify and hold
+      Licensor harmless from any claims arising from your failure to
+      comply with such laws.
+   8. Trademarks and related. Nothing in this Agreement permits you to
+      make use of Licensor's trademarks, trade names, logos or to
+      otherwise suggest endorsement or misrepresent the relationship
+      between the parties; and any rights not expressly granted herein
+      are reserved by the Licensor.
+   9. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides LTX-2 on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or
+      conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS
+      FOR A PARTICULAR PURPOSE. You are solely responsible for
+      determining the appropriateness of using or redistributing LTX-2
+      and Derivatives of LTX-2 and assume any risks associated with
+      your exercise of permissions under this Agreement.
+   10. Limitation of Liability. In no event and under no legal theory,
+       whether in tort (including negligence), contract, or otherwise,
+       unless required by applicable law (such as deliberate and grossly
+       negligent acts) or agreed to in writing, shall Licensor be liable
+       to you for damages, including any direct, indirect, special,
+       incidental, or consequential damages of any character arising as
+       a result of this Agreement or out of the use or inability to use
+       LTX-2 (including but not limited to damages for loss of goodwill,
+       work stoppage, computer failure or malfunction, or any and all
+       other commercial damages or losses), even if Licensor has been
+       advised of the possibility of such damages.
+   11. Accepting Warranty or Additional Liability. While redistributing
+       LTX-2 and Derivatives of LTX-2, you may, provided you do not
+       violate the terms of this Agreement, choose to offer and charge
+       a fee for, acceptance of support, warranty, indemnity, or other
+       liability obligations. However, in accepting such obligations,
+       you may act only on your own behalf and on your sole
+       responsibility, not on behalf of Licensor, and only if you agree
+       to indemnify, defend, and hold Licensor harmless for any liability
+       incurred by, or claims asserted against Licensor, by reason of
+       your accepting any such warranty or additional liability.
+   12. Governing Law. This Agreement and all relations, disputes, claims
+       and other matters arising hereunder (including non-contractual
+       disputes or claims) will be governed exclusively by, and construed
+       exclusively in accordance with, the laws of the State of New York.
+       To the extent permitted by law, choice of laws rules and the
+       United Nations Convention on Contracts for the International Sale
+       of Goods will not apply. For the purposes of adjudicating any
+       action or proceeding to enforce the terms of this Agreement, you
+       hereby irrevocably consent to the exclusive jurisdiction of, and
+       venue in, the federal and state courts located in the County of
+       New York within the State of New York. The prevailing party in
+       any claim or dispute between the parties under this Agreement
+       will be entitled to reimbursement of its reasonable attorneys'
+       fees and costs. You hereby waive the right to a trial by jury,
+       to participate in a class or representative action (including in
+       arbitration), or to combine individual proceedings in court or
+       in arbitration without the consent of all parties.
+   13. Term and Termination. This Agreement is effective upon your
+       acceptance and continues until terminated. Licensor may terminate
+       this Agreement immediately upon written notice to you if you
+       breach any provision of this Agreement, including but not limited
+       to violations of the use restrictions in Attachment A or
+       unauthorized commercial use. Upon termination: (a) all rights
+       granted to you under this Agreement will immediately cease;
+       (b) you must immediately cease all use of LTX-2 and Derivatives
+       of LTX-2; (c) you must delete or destroy all copies of LTX-2
+       and Derivatives of LTX-2 in your possession or control; and
+       (d) you must notify any third parties to whom you distributed
+       LTX-2 or Derivatives of LTX-2 of the termination. Sections 8-13,
+       and Section 15 shall survive termination of this Agreement.
+       Termination does not relieve you of any obligations incurred
+       prior to termination, including payment obligations under
+       Section 2. In addition, if You commence a lawsuit or other
+       proceedings (including a cross-claim or counterclaim in a lawsuit)
+       against Licensor or any person or entity alleging that LTX-2 or
+       any Output, or any portion of any of the foregoing, infringe any
+       intellectual property or other right owned or licensable by you,
+       then all licenses granted to you under this Agreement shall
+       terminate as of the date such lawsuit or other proceeding is filed.
+   14. Disputes and Arbitration. All disputes arising in connection with
+       this Agreement shall be finally settled by arbitration under the
+       Rules of Arbitration of the International Chamber of Commerce
+       ("ICC Rules"), by one (1) arbitrator appointed in accordance with
+       the ICC Rules. The seat of arbitration shall be New York, NY, USA,
+       and the proceedings shall be conducted in English. The arbitrator
+       shall be empowered to grant any relief that a court could grant.
+       Judgment on the arbitration award may be entered by any court
+       having jurisdiction thereof. Each party waives its right to a
+       trial by jury and to participate in any class or representative
+       action.
+   15. If any provision of this Agreement is held to be
+       invalid, illegal
+       or unenforceable, the remaining provisions shall be unaffected
+       thereby and remain valid as if such provision had not been set
+       forth herein.
+   END OF TERMS AND CONDITIONS
+   ATTACHMENT A: Use Restrictions
+      When using the Outputs, LTX-2 and any Derivatives thereof, you
+      will comply with the Acceptable Use Policy. In addition, you
+      agree not to use the Outputs, LTX-2 or its Derivatives in any
+      of the following ways:
+      1. In any way that violates any applicable national, federal,
+         state, local or international law or regulation;
+      2. For the purpose of exploiting, Harming or attempting to
+         exploit or Harm minors in any way;
+      3. To generate or disseminate false information and/or content
+         with the purpose of Harming others;
+      4. To generate or disseminate personal identifiable information
+         that can be used to Harm an individual;
+      5. To generate or disseminate information and/or content (e.g.
+         images, code, posts, articles), and place the information
+         and/or content in any context (e.g. bot generating tweets)
+         without expressly and intelligibly disclaiming that the
+         information and/or content is machine generated;
+      6. To defame, disparage or otherwise harass others;
+      7. To impersonate or attempt to impersonate (e.g. deepfakes)
+         others without their consent;
+      8. For fully automated decision making that adversely impacts an
+         individual's legal rights or otherwise creates or modifies a
+         binding, enforceable obligation;
+      9. For any use intended to or which has the effect of
+         discriminating against or Harming individuals or groups based
+         on online or offline social behavior or known or predicted
+         personal or personality characteristics;
+      10. To exploit any of the vulnerabilities of a specific group of
+          persons based on their age, social, physical or mental
+          characteristics, in order to materially distort the behavior
+          of a person pertaining to that group in a manner that causes
+          or is likely to cause that person or another person physical
+          or psychological Harm;
+      11. For any use intended to or which has the effect of
+          discriminating against individuals or groups based on legally
+          protected characteristics or categories;
+      12. To provide medical advice and medical results interpretation;
+      13. To generate or disseminate information for the purpose to be
+          used for administration of justice, law enforcement,
+          immigration or asylum processes, such as predicting an
+          individual will commit fraud/crime commitment (e.g. by text
+          profiling, drawing causal relationships between assertions
+          made in documents, indiscriminate and arbitrarily-targeted use);
+      14. To generate and/or disseminate malware (including – but not
+          limited to – ransomware) or any other content to be used for
+          the purpose of harming electronic systems;
+      15. To engage in, promote, incite, or facilitate discrimination
+          or other unlawful or harmful conduct in the provision of
+          employment, employment benefits, credit, housing, or other
+          essential goods and services;
+      16. To engage in, promote, incite, or facilitate the harassment,
+          abuse, threatening, or bullying of individuals or groups of
+          individuals;
+      17. For military, warfare, nuclear industries or applications,
+          weapons development, or any use in connection with activities
+          that may cause death, personal injury, or severe physical or
+          environmental damage;
+      18. For commercial use only: To train, improve, or fine-tune any
+          other machine learning model, artificial intelligence system,
+          or competing model, except for Derivatives of LTX-2 as
+          expressly permitted under this Agreement;
+      19. To circumvent, disable, or interfere with any technical
+          limitations, safety features, content filters, or use
+          restrictions implemented in LTX-2 by Licensor;
+      20. To use LTX-2 or Derivatives of LTX-2 in any product, service,
+          or application that directly competes with Licensor's
+          commercial products or services, or is designed to replace or
+          substitute Licensor's offerings in the market, without
+          obtaining a separate commercial license from Licensor.

README.md ADDED Viewed

	@@ -0,0 +1,131 @@

+---
+language:
+- en
+- de
+- es
+- fr
+- ja
+- ko
+- zh
+- it
+- pt
+library_name: diffusers
+license: other
+license_name: ltx-2-community-license-agreement
+license_link: https://github.com/Lightricks/LTX-2/blob/main/LICENSE
+pipeline_tag: image-to-video
+arxiv: 2601.03233
+tags:
+- image-to-video
+- text-to-video
+- video-to-video
+- image-text-to-video
+- audio-to-video
+- text-to-audio
+- video-to-audio
+- audio-to-audio
+- text-to-audio-video
+- image-to-audio-video
+- image-text-to-audio-video
+- ltx-2
+- ltx-video
+- ltxv
+- lightricks
+pinned: true
+demo: https://app.ltx.studio/ltx-2-playground/i2v
+---
+# LTX-2 Model Card
+This model card focuses on the LTX-2 model, as presented in the paper [LTX-2: Efficient Joint Audio-Visual Foundation Model](https://huggingface.co/papers/2601.03233). The codebase is available [here](https://github.com/Lightricks/LTX-2).
+LTX-2 is a DiT-based audio-video foundation model designed to generate synchronized video and audio within a single model. It brings together the core building blocks of modern video generation, with open weights and a focus on practical, local execution.
+[![LTX-2 Open Source](https://img.youtube.com/vi/8fWAJXZJbRA/maxresdefault.jpg)](https://www.youtube.com/watch?v=8fWAJXZJbRA)
+# Model Checkpoints
+| Name                           | Notes                                                                                                          |
+|--------------------------------|----------------------------------------------------------------------------------------------------------------|
+| ltx-2-19b-dev                  | The full model, flexible and trainable in bf16                                                                 |
+| ltx-2-19b-dev-fp8              | The full model in fp8 quantization                                                                             |
+| ltx-2-19b-dev-fp4              | The full model in nvfp4 quantization                                                                           |
+| ltx-2-19b-distilled            | The distilled version of the full model, 8 steps, CFG=1                                                        |
+| ltx-2-19b-distilled-lora-384   | A LoRA version of the distilled model applicable to the full model                                             |
+| ltx-2-spatial-upscaler-x2-1.0  | An x2 spatial upscaler for the ltx-2 latents, used in multi stage (multiscale) pipelines for higher resolution |
+| ltx-2-temporal-upscaler-x2-1.0 | An x2 temporal upscaler for the ltx-2 latents, used in multi stage (multiscale) pipelines for higher FPS       |
+## Model Details
+- **Developed by:** Lightricks
+- **Model type:** Diffusion-based audio-video foundation model
+- **Language(s):** English
+# Online demo
+LTX-2 is accessible right away via the following links:
+- [LTX-Studio text-to-video](https://app.ltx.studio/ltx-2-playground/t2v)
+- [LTX-Studio image-to-video](https://app.ltx.studio/ltx-2-playground/i2v)
+# Run locally
+## Direct use license
+You can use the models - full, distilled, upscalers and any derivatives of the models - for purposes under the [license](./LICENSE).
+## ComfyUI
+We recommend you use the built-in LTXVideo nodes that can be found in the ComfyUI Manager.
+For manual installation information, please refer to our [documentation site](https://docs.ltx.video/open-source-model/integration-tools/comfy-ui).
+## PyTorch codebase
+The [LTX-2 codebase](https://github.com/Lightricks/LTX-2) is a monorepo with several packages. From model definition in 'ltx-core' to pipelines in 'ltx-pipelines' and training capabilities in 'ltx-trainer'.
+The codebase was tested with Python >=3.12, CUDA version >12.7, and supports PyTorch ~= 2.7.
+### Installation
+```bash
+git clone https://github.com/Lightricks/LTX-2.git
+cd LTX-2
+# From the repository root
+uv sync
+source .venv/bin/activate
+```
+### Inference
+To use our model, please follow the instructions in our [ltx-pipelines](https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-pipelines/README.md) package.
+## Diffusers 🧨
+LTX-2 is supported in the [Diffusers Python library](https://huggingface.co/docs/diffusers/main/en/index) for image-to-video generation.
+## General tips:
+* Width & height settings must be divisible by 32. Frame count must be divisible by 8 + 1.
+* In case the resolution or number of frames are not divisible by 32 or 8 + 1, the input should be padded with -1 and then cropped to the desired resolution and number of frames.
+* For tips on writing effective prompts, please visit our [Prompting guide](https://ltx.video/blog/how-to-prompt-for-ltx-2)
+### Limitations
+- This model is not intended or able to provide factual information.
+- As a statistical model this checkpoint might amplify existing societal biases.
+- The model may fail to generate videos that matches the prompts perfectly.
+- Prompt following is heavily influenced by the prompting-style.
+- The model may generate content that is inappropriate or offensive.
+- When generating audio without speech, the audio may be of lower quality.
+# Train the model
+The base (dev) model is fully trainable.
+It's extremely easy to reproduce the LoRAs and IC-LoRAs we publish with the model by following the instructions on the [LTX-2 Trainer Readme](https://github.com/Lightricks/LTX-2/blob/main/packages/ltx-trainer/README.md).
+Training for motion, style or likeness (sound+appearance) can take less than an hour in many settings.
+## Citation
+```bibtex
+@article{hacohen2025ltx2,
+  title={LTX-2: Efficient Joint Audio-Visual Foundation Model},
+  author={HaCohen, Yoav and Brazowski, Benny and Chiprut, Nisan and Bitterman, Yaki and Kvochko, Andrew and Berkowitz, Avishai and Shalem, Daniel and Lifschitz, Daphna and Moshe, Dudu and Porat, Eitan and Richardson, Eitan and Guy Shiran and Itay Chachy and Jonathan Chetboun and Michael Finkelson and Michael Kupchick and Nir Zabari and Nitzan Guetta and Noa Kotler and Ofir Bibi and Ori Gordon and Poriya Panet and Roi Benita and Shahar Armon and Victor Kulikov and Yaron Inger and Yonatan Shiftan and Zeev Melumian and Zeev Farbman},
+  journal={arXiv preprint arXiv:2601.03233},
+  year={2025}
+}
+```

audio_vae/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_class_name": "AutoencoderKLLTX2Audio",
+  "_diffusers_version": "0.37.0.dev0",
+  "attn_resolutions": null,
+  "base_channels": 128,
+  "causality_axis": "height",
+  "ch_mult": [
+    1,
+    2,
+    4
+  ],
+  "double_z": true,
+  "dropout": 0.0,
+  "in_channels": 2,
+  "is_causal": true,
+  "latent_channels": 8,
+  "mel_bins": 64,
+  "mel_hop_length": 160,
+  "mid_block_add_attention": false,
+  "norm_type": "pixel",
+  "num_res_blocks": 2,
+  "output_channels": 2,
+  "resolution": 256,
+  "sample_rate": 16000
+}

audio_vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b36ce4066065ce0aa5ff4d7cf96a3e3bc0859c6fefcf864663e3fe686c5c181c
+size 106507972

connectors/config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "LTX2TextConnectors",
+  "_diffusers_version": "0.37.0.dev0",
+  "audio_connector_attention_head_dim": 128,
+  "audio_connector_num_attention_heads": 30,
+  "audio_connector_num_layers": 2,
+  "audio_connector_num_learnable_registers": 128,
+  "caption_channels": 3840,
+  "causal_temporal_positioning": false,
+  "connector_rope_base_seq_len": 4096,
+  "rope_double_precision": true,
+  "rope_theta": 10000.0,
+  "rope_type": "split",
+  "text_proj_in_factor": 49,
+  "video_connector_attention_head_dim": 128,
+  "video_connector_num_attention_heads": 30,
+  "video_connector_num_layers": 2,
+  "video_connector_num_learnable_registers": 128
+}

connectors/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7c0ad36c2d0706fb229193d5c698f0ef50c9b33678140b4ee84723a047b4032
+size 2862957976

latent_upsampler/config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_class_name": "LTX2LatentUpsamplerModel",
+  "_diffusers_version": "0.37.0.dev0",
+  "dims": 3,
+  "in_channels": 128,
+  "mid_channels": 1024,
+  "num_blocks_per_stage": 4,
+  "rational_spatial_scale": 2.0,
+  "spatial_upsample": true,
+  "temporal_upsample": false
+}

latent_upsampler/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c56276acbffb30f97824b4c2fd6770e8157d65e5be7a93e2307393c1ebbb1f12
+size 995743482

ltx-2-19b-dev-fp4.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08a28245d8412962a8f6a1437c7db4b07bd5c2acdb2b84f96793a9b7c8550751
+size 19988416674

ltx-2-19b-dev-fp8.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a67e709b6d1adc061cb19921887a5c15754178199e45801a04310e9b522760d
+size 27078716018

ltx-2-19b-dev.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a51e70aad660e55648d6f0b8af15c8acaaffc06e2a4ae7c7cb01ede701981a8
+size 43285058242

ltx-2-19b-distilled-fp8.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ae14327130c6ffdc87705b02c8e7654aa5c6d9a7f28a52d0acc1c30cb0d2932
+size 27078716346

ltx-2-19b-distilled-lora-384.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2718f89582003cbb5b616635f18c091641917a3f3e5a2f2ad0fb3d5fdd153534
+size 7674558424

ltx-2-19b-distilled.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4006d689061cde0967b9d96eaf44253ff08f5de0c78e5fa1331a763cd03ee28
+size 43285058186

ltx-2-running-local.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c0bde52079dd8cc7bdad1d1594d0db80a530d08d854c4f9fa423bef262da965
+size 15736325

ltx-2-spatial-upscaler-x2-1.0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3160fabf8edf0bc4dd8de40353a180813b111ce586b655ad54af9a7b8c6736de
+size 995765578

ltx-2-temporal-upscaler-x2-1.0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a35c2eb92f6ed39369fcb83045daa070bc7c2a97fc7267abd6291203fd05b88
+size 261965800

model_index.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "LTX2Pipeline",
+  "_diffusers_version": "0.37.0.dev0",
+  "audio_vae": [
+    "diffusers",
+    "AutoencoderKLLTX2Audio"
+  ],
+  "connectors": [
+    "ltx2",
+    "LTX2TextConnectors"
+  ],
+  "scheduler": [
+    "diffusers",
+    "FlowMatchEulerDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "Gemma3ForConditionalGeneration"
+  ],
+  "tokenizer": [
+    "transformers",
+    "GemmaTokenizerFast"
+  ],
+  "transformer": [
+    "diffusers",
+    "LTX2VideoTransformer3DModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKLLTX2Video"
+  ],
+  "vocoder": [
+    "ltx2",
+    "LTX2Vocoder"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "_class_name": "FlowMatchEulerDiscreteScheduler",
+  "_diffusers_version": "0.37.0.dev0",
+  "base_image_seq_len": 1024,
+  "base_shift": 0.95,
+  "invert_sigmas": false,
+  "max_image_seq_len": 4096,
+  "max_shift": 2.05,
+  "num_train_timesteps": 1000,
+  "shift": 1.0,
+  "shift_terminal": 0.1,
+  "stochastic_sampling": false,
+  "time_shift_type": "exponential",
+  "use_beta_sigmas": false,
+  "use_dynamic_shifting": true,
+  "use_exponential_sigmas": false,
+  "use_karras_sigmas": false
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "architectures": [
+    "Gemma3ForConditionalGeneration"
+  ],
+  "boi_token_index": 255999,
+  "dtype": "float32",
+  "eoi_token_index": 256000,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "image_token_index": 262144,
+  "initializer_range": 0.02,
+  "mm_tokens_per_image": 256,
+  "model_type": "gemma3",
+  "text_config": {
+    "_sliding_window_pattern": 6,
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "attn_logit_softcapping": null,
+    "cache_implementation": "hybrid",
+    "dtype": "float32",
+    "final_logit_softcapping": null,
+    "head_dim": 256,
+    "hidden_activation": "gelu_pytorch_tanh",
+    "hidden_size": 3840,
+    "initializer_range": 0.02,
+    "intermediate_size": 15360,
+    "layer_types": [
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 131072,
+    "model_type": "gemma3_text",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 8,
+    "query_pre_attn_scalar": 256,
+    "rms_norm_eps": 1e-06,
+    "rope_local_base_freq": 10000,
+    "rope_scaling": {
+      "factor": 8.0,
+      "rope_type": "linear"
+    },
+    "rope_theta": 1000000,
+    "sliding_window": 1024,
+    "sliding_window_pattern": 6,
+    "use_bidirectional_attention": false,
+    "use_cache": true,
+    "vocab_size": 262208
+  },
+  "transformers_version": "4.57.3",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_size": 896,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "vision_use_head": false
+  }
+}

text_encoder/diffusion_pytorch_model-00001-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06ffef2cbc9908f6db15a735a12c412c106ff7f112b3d4da72bc98c00bc2c034
+size 1685231024

text_encoder/diffusion_pytorch_model-00002-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:308270af3b7caa5d2cd0076dff5a2dd9f0020d6628fe2d2ee04fa597cb066fbb
+size 4987027560

text_encoder/diffusion_pytorch_model-00003-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:523d1b6d3ba4b9ede7a5e6f7df7599bdb12eeab23099694293ab2bbbfa62cc6f
+size 4844750680

text_encoder/diffusion_pytorch_model-00004-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf426cf00fe66fa5fd48d2acae77082f7f423c71c55d9c7a8da26232e852b7a0
+size 4954910584

text_encoder/diffusion_pytorch_model-00005-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01c8cec1fc6d7024b8fcf4517b79ca0df34279e4d6767423a2229772c1a9d5e3
+size 4907665448

text_encoder/diffusion_pytorch_model-00006-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1a5ec996bdd602cfebba1fa7f06f6942643032b353b13a0fd1a8c00382efb24
+size 4954910640

text_encoder/diffusion_pytorch_model-00007-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5594441c5b83d7404a16ebf5ec51e0947b9639b62561e2442170c0b6e0069502
+size 4907665448

text_encoder/diffusion_pytorch_model-00008-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b333d3bb47641e91e6fa2cff9580b25463a5d76a1b1a272b77d3d6c0fe78a556
+size 4954910640

text_encoder/diffusion_pytorch_model-00009-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34db39ec863ee8c357f4247455bca8eabba9f3ccb9f838daf795db04b1919250
+size 4907665448

text_encoder/diffusion_pytorch_model-00010-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e72953188ecbdf2a526371b46f66bfd27c58d5ad622bf5c4147aeab7ddb83cb
+size 4954910640

text_encoder/diffusion_pytorch_model-00011-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29993bd9711eba9336246990ffa2cb6ae584816cad0249b6a0bc3729d95bb869
+size 4962817760

text_encoder/diffusion_pytorch_model-00012-of-00012.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19a8f0f23c87c36285a10632fabfb2c091f211244d124b9c63074debba6e6b21
+size 589949224

text_encoder/diffusion_pytorch_model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

text_encoder/generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.57.3"
+}

text_encoder/model-00001-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc6e8132e4998652d4c8c219f0ca8da10a143606c806c8728a7513d49bbcdb3
+size 1685223128

text_encoder/model-00002-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b95e7ab472b88860e53c71ef078fe8ba4c85e3727b8f0a0bcc130c1f4a2b9ab9
+size 4987027384

text_encoder/model-00003-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3731e7c18280d0e9dcbcfadaecaeb8b81fa7f7f15e702ea3d1b4f8eb1b5919f7
+size 4844749824

text_encoder/model-00004-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9d1ce8b472f2cc6d70c7885388f50fb3a5f233cf1d4784f6a4be1732547a74c
+size 4954909736

text_encoder/model-00005-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb478659a67b2e34a920f237387a1d7a8208325aa2fc285198a97603f15af1e6
+size 4907664584

text_encoder/model-00006-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a190581d871980f9309f40d5c2b4db99d76737d7ed743d531bac95443f6b7145
+size 4954909792

text_encoder/model-00007-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c347de789ff34be642e4fe7ab8e142ca5e3d833d70cb9ec5127a86af0e2ecfed
+size 4907664584

text_encoder/model-00008-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ec7525b89b083d774f4abbdd1f2d7ee190f0475658875cf700530a2faa84a4f
+size 4954909792

text_encoder/model-00009-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b0117ecf1d83691bb875c66a9f2b47e450a87fdfe90915bc7b9e9946c37a650
+size 4907664584

text_encoder/model-00010-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f665a743589231adb812c61c9d8e295c9a68c31d3e87976ae032f1de4e46b6
+size 4954909792

text_encoder/model-00011-of-00011.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:999bf4706d4f616e558eec486667fc2b66f8d0f9c106e1d1f802458fb349b0db
+size 2689808472

text_encoder/model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

tokenizer/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}

tokenizer/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_pan_and_scan": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Gemma3ImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "pan_and_scan_max_num_crops": null,
+  "pan_and_scan_min_crop_size": null,
+  "pan_and_scan_min_ratio_to_activate": null,
+  "processor_class": "Gemma3Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 896,
+    "width": 896
+  }
+}