mehere23 commited on Sep 4, 2025

Commit

9186fb4

verified ·

1 Parent(s): 7fe6e76

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +184 -0
LICENSE +202 -0
README.md +535 -0
added_tokens.json +3 -0
chat_template.json +3 -0
config.json +42 -0
generation_config.json +13 -0
merges.txt +0 -0
model-00001-of-00008.safetensors.part0000 +3 -0
model-00001-of-00008.safetensors.part0001 +3 -0
model-00001-of-00008.safetensors.part0002 +3 -0
model-00001-of-00008.safetensors.part0003 +3 -0
model-00001-of-00008.safetensors.part0004 +3 -0
model-00001-of-00008.safetensors.part0005 +3 -0
model-00001-of-00008.safetensors.part0006 +3 -0
model-00001-of-00008.safetensors.part0007 +3 -0
model-00001-of-00012.safetensors.part0000 +3 -0
model-00001-of-00012.safetensors.part0001 +3 -0
model-00001-of-00012.safetensors.part0002 +3 -0
model-00001-of-00012.safetensors.part0003 +3 -0
model-00001-of-00012.safetensors.part0004 +3 -0
model-00001-of-00012.safetensors.part0005 +3 -0
model-00001-of-00012.safetensors.part0006 +3 -0
model-00001-of-00012.safetensors.part0007 +3 -0
model-00001-of-00012.safetensors.part0008 +3 -0
model-00001-of-00012.safetensors.part0009 +3 -0
model-00002-of-00008.safetensors.part0000 +3 -0
model-00002-of-00008.safetensors.part0001 +3 -0
model-00002-of-00008.safetensors.part0002 +3 -0
model-00002-of-00008.safetensors.part0003 +3 -0
model-00002-of-00008.safetensors.part0004 +3 -0
model-00002-of-00008.safetensors.part0005 +3 -0
model-00002-of-00008.safetensors.part0006 +3 -0
model-00002-of-00008.safetensors.part0007 +3 -0
model-00002-of-00012.safetensors.part0000 +3 -0
model-00002-of-00012.safetensors.part0001 +3 -0
model-00002-of-00012.safetensors.part0002 +3 -0
model-00002-of-00012.safetensors.part0003 +3 -0
model-00002-of-00012.safetensors.part0004 +3 -0
model-00002-of-00012.safetensors.part0005 +3 -0
model-00002-of-00012.safetensors.part0006 +3 -0
model-00002-of-00012.safetensors.part0007 +3 -0
model-00002-of-00012.safetensors.part0008 +3 -0
model-00002-of-00012.safetensors.part0009 +3 -0
model-00003-of-00008.safetensors.part0000 +3 -0
model-00003-of-00008.safetensors.part0001 +3 -0
model-00003-of-00008.safetensors.part0002 +3 -0
model-00003-of-00008.safetensors.part0003 +3 -0
model-00003-of-00008.safetensors.part0004 +3 -0
model-00003-of-00008.safetensors.part0005 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,187 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00012-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+tokenizer.model filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00008.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00008.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00008.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00008.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00008.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0000 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0001 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0002 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0003 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0004 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0005 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0006 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0007 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0008 filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors.part0009 filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2024 Alibaba Cloud
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,535 @@

+---
+license: gemma
+library_name: transformers
+pipeline_tag: image-text-to-text
+extra_gated_heading: Access Gemma on Hugging Face
+extra_gated_prompt: To access Gemma on Hugging Face, you’re required to review and
+  agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging
+  Face and click below. Requests are processed immediately.
+extra_gated_button_content: Acknowledge license
+base_model: google/gemma-3-27b-pt
+---
+# Gemma 3 model card
+**Model Page**: [Gemma](https://ai.google.dev/gemma/docs/core)
+**Resources and Technical Documentation**:
+* [Gemma 3 Technical Report][g3-tech-report]
+* [Responsible Generative AI Toolkit][rai-toolkit]
+* [Gemma on Kaggle][kaggle-gemma]
+* [Gemma on Vertex Model Garden][vertex-mg-gemma3]
+**Terms of Use**: [Terms][terms]
+**Authors**: Google DeepMind
+## Model Information
+Summary description and brief definition of inputs and outputs.
+### Description
+Gemma is a family of lightweight, state-of-the-art open models from Google,
+built from the same research and technology used to create the Gemini models.
+Gemma 3 models are multimodal, handling text and image input and generating text
+output, with open weights for both pre-trained variants and instruction-tuned
+variants. Gemma 3 has a large, 128K context window, multilingual support in over
+140 languages, and is available in more sizes than previous versions. Gemma 3
+models are well-suited for a variety of text generation and image understanding
+tasks, including question answering, summarization, and reasoning. Their
+relatively small size makes it possible to deploy them in environments with
+limited resources such as laptops, desktops or your own cloud infrastructure,
+democratizing access to state of the art AI models and helping foster innovation
+for everyone.
+### Inputs and outputs
+-   **Input:**
+    -  Text string, such as a question, a prompt, or a document to be summarized
+    -  Images, normalized to 896 x 896 resolution and encoded to 256 tokens
+       each
+    -  Total input context of 128K tokens for the 4B, 12B, and 27B sizes, and
+       32K tokens for the 1B size
+-   **Output:**
+    -   Generated text in response to the input, such as an answer to a
+        question, analysis of image content, or a summary of a document
+    -   Total output context of 8192 tokens
+### Usage
+Below there are some code snippets on how to get quickly started with running the model. First, install the Transformers library. Gemma 3 is supported starting from transformers 4.50.0.
+```sh
+$ pip install -U transformers
+```
+Then, copy the snippet from the section that is relevant for your use case.
+#### Running with the `pipeline` API
+You can initialize the model and processor for inference with `pipeline` as follows.
+```python
+from transformers import pipeline
+import torch
+pipe = pipeline(
+    "image-text-to-text",
+    model="google/gemma-3-27b-it",
+    device="cuda",
+    torch_dtype=torch.bfloat16
+)
+```
+With instruction-tuned models, you need to use chat templates to process our inputs first. Then, you can pass it to the pipeline.
+```python
+messages = [
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "You are a helpful assistant."}]
+    },
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
+            {"type": "text", "text": "What animal is on the candy?"}
+        ]
+    }
+]
+output = pipe(text=messages, max_new_tokens=200)
+print(output[0]["generated_text"][-1]["content"])
+# Okay, let's take a look!
+# Based on the image, the animal on the candy is a **turtle**.
+# You can see the shell shape and the head and legs.
+```
+#### Running the model on a single/multi GPU
+```python
+# pip install accelerate
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from PIL import Image
+import requests
+import torch
+model_id = "google/gemma-3-27b-it"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
+processor = AutoProcessor.from_pretrained(model_id)
+messages = [
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "You are a helpful assistant."}]
+    },
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
+            {"type": "text", "text": "Describe this image in detail."}
+        ]
+    }
+]
+inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
+with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
+# **Overall Impression:** The image is a close-up shot of a vibrant garden scene,
+# focusing on a cluster of pink cosmos flowers and a busy bumblebee.
+# It has a slightly soft, natural feel, likely captured in daylight.
+```
+### Citation
+```none
+@article{gemma_2025,
+    title={Gemma 3},
+    url={https://goo.gle/Gemma3Report},
+    publisher={Kaggle},
+    author={Gemma Team},
+    year={2025}
+}
+```
+## Model Data
+Data used for model training and how the data was processed.
+### Training Dataset
+These models were trained on a dataset of text data that includes a wide variety
+of sources. The 27B model was trained with 14 trillion tokens, the 12B model was
+trained with 12 trillion tokens, 4B model was trained with 4 trillion tokens and
+1B with 2 trillion tokens. Here are the key components:
+-   Web Documents: A diverse collection of web text ensures the model is
+    exposed to a broad range of linguistic styles, topics, and vocabulary. The
+    training dataset includes content in over 140 languages.
+-   Code: Exposing the model to code helps it to learn the syntax and
+    patterns of programming languages, which improves its ability to generate
+    code and understand code-related questions.
+-   Mathematics: Training on mathematical text helps the model learn logical
+    reasoning, symbolic representation, and to address mathematical queries.
+-   Images: A wide range of images enables the model to perform image
+    analysis and visual data extraction tasks.
+The combination of these diverse data sources is crucial for training a powerful
+multimodal model that can handle a wide variety of different tasks and data
+formats.
+### Data Preprocessing
+Here are the key data cleaning and filtering methods applied to the training
+data:
+-   CSAM Filtering: Rigorous CSAM (Child Sexual Abuse Material) filtering
+    was applied at multiple stages in the data preparation process to ensure
+    the exclusion of harmful and illegal content.
+-   Sensitive Data Filtering: As part of making Gemma pre-trained models
+    safe and reliable, automated techniques were used to filter out certain
+    personal information and other sensitive data from training sets.
+-   Additional methods: Filtering based on content quality and safety in
+    line with [our policies][safety-policies].
+## Implementation Information
+Details about the model internals.
+### Hardware
+Gemma was trained using [Tensor Processing Unit (TPU)][tpu] hardware (TPUv4p,
+TPUv5p and TPUv5e). Training vision-language models (VLMS) requires significant
+computational power. TPUs, designed specifically for matrix operations common in
+machine learning, offer several advantages in this domain:
+-   Performance: TPUs are specifically designed to handle the massive
+    computations involved in training VLMs. They can speed up training
+    considerably compared to CPUs.
+-   Memory: TPUs often come with large amounts of high-bandwidth memory,
+    allowing for the handling of large models and batch sizes during training.
+    This can lead to better model quality.
+-   Scalability: TPU Pods (large clusters of TPUs) provide a scalable
+    solution for handling the growing complexity of large foundation models.
+    You can distribute training across multiple TPU devices for faster and more
+    efficient processing.
+-   Cost-effectiveness: In many scenarios, TPUs can provide a more
+    cost-effective solution for training large models compared to CPU-based
+    infrastructure, especially when considering the time and resources saved
+    due to faster training.
+-   These advantages are aligned with
+    [Google's commitments to operate sustainably][sustainability].
+### Software
+Training was done using [JAX][jax] and [ML Pathways][ml-pathways].
+JAX allows researchers to take advantage of the latest generation of hardware,
+including TPUs, for faster and more efficient training of large models. ML
+Pathways is Google's latest effort to build artificially intelligent systems
+capable of generalizing across multiple tasks. This is specially suitable for
+foundation models, including large language models like these ones.
+Together, JAX and ML Pathways are used as described in the
+[paper about the Gemini family of models][gemini-2-paper]; *"the 'single
+controller' programming model of Jax and Pathways allows a single Python
+process to orchestrate the entire training run, dramatically simplifying the
+development workflow."*
+## Evaluation
+Model evaluation metrics and results.
+### Benchmark Results
+These models were evaluated against a large collection of different datasets and
+metrics to cover different aspects of text generation:
+#### Reasoning and factuality
+| Benchmark                      | Metric         | Gemma 3 PT 1B  | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
+| ------------------------------ |----------------|:--------------:|:-------------:|:--------------:|:--------------:|
+| [HellaSwag][hellaswag]         | 10-shot        |      62.3      |      77.2     |      84.2      |      85.6      |
+| [BoolQ][boolq]                 | 0-shot         |      63.2      |      72.3     |      78.8      |      82.4      |
+| [PIQA][piqa]                   | 0-shot         |      73.8      |      79.6     |      81.8      |      83.3      |
+| [SocialIQA][socialiqa]         | 0-shot         |      48.9      |      51.9     |      53.4      |      54.9      |
+| [TriviaQA][triviaqa]           | 5-shot         |      39.8      |      65.8     |      78.2      |      85.5      |
+| [Natural Questions][naturalq]  | 5-shot         |      9.48      |      20.0     |      31.4      |      36.1      |
+| [ARC-c][arc]                   | 25-shot        |      38.4      |      56.2     |      68.9      |      70.6      |
+| [ARC-e][arc]                   | 0-shot         |      73.0      |      82.4     |      88.3      |      89.0      |
+| [WinoGrande][winogrande]       | 5-shot         |      58.2      |      64.7     |      74.3      |      78.8      |
+| [BIG-Bench Hard][bbh]          | few-shot       |      28.4      |      50.9     |      72.6      |      77.7      |
+| [DROP][drop]                   | 1-shot         |      42.4      |      60.1     |      72.2      |      77.2      |
+[hellaswag]: https://arxiv.org/abs/1905.07830
+[boolq]: https://arxiv.org/abs/1905.10044
+[piqa]: https://arxiv.org/abs/1911.11641
+[socialiqa]: https://arxiv.org/abs/1904.09728
+[triviaqa]: https://arxiv.org/abs/1705.03551
+[naturalq]: https://github.com/google-research-datasets/natural-questions
+[arc]: https://arxiv.org/abs/1911.01547
+[winogrande]: https://arxiv.org/abs/1907.10641
+[bbh]: https://paperswithcode.com/dataset/bbh
+[drop]: https://arxiv.org/abs/1903.00161
+#### STEM and code
+| Benchmark                      | Metric         | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
+| ------------------------------ |----------------|:-------------:|:--------------:|:--------------:|
+| [MMLU][mmlu]                   | 5-shot         |      59.6     |      74.5      |      78.6      |
+| [MMLU][mmlu] (Pro COT)         | 5-shot         |      29.2     |      45.3      |      52.2      |
+| [AGIEval][agieval]             | 3-5-shot       |      42.1     |      57.4      |      66.2      |
+| [MATH][math]                   | 4-shot         |      24.2     |      43.3      |      50.0      |
+| [GSM8K][gsm8k]                 | 8-shot         |      38.4     |      71.0      |      82.6      |
+| [GPQA][gpqa]                   | 5-shot         |      15.0     |      25.4      |      24.3      |
+| [MBPP][mbpp]                   | 3-shot         |      46.0     |      60.4      |      65.6      |
+| [HumanEval][humaneval]         | 0-shot         |      36.0     |      45.7      |      48.8      |
+[mmlu]: https://arxiv.org/abs/2009.03300
+[agieval]: https://arxiv.org/abs/2304.06364
+[math]: https://arxiv.org/abs/2103.03874
+[gsm8k]: https://arxiv.org/abs/2110.14168
+[gpqa]: https://arxiv.org/abs/2311.12022
+[mbpp]: https://arxiv.org/abs/2108.07732
+[humaneval]: https://arxiv.org/abs/2107.03374
+#### Multilingual
+| Benchmark                            | Gemma 3 PT 1B | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
+| ------------------------------------ |:-------------:|:-------------:|:--------------:|:--------------:|
+| [MGSM][mgsm]                         |      2.04     |      34.7     |      64.3     |      74.3     |
+| [Global-MMLU-Lite][global-mmlu-lite] |      24.9     |      57.0     |      69.4     |      75.7     |
+| [WMT24++][wmt24pp] (ChrF)            |      36.7     |      48.4     |      53.9     |      55.7     |
+| [FloRes][flores]                     |      29.5     |      39.2     |      46.0     |      48.8     |
+| [XQuAD][xquad] (all)                 |      43.9     |      68.0     |      74.5     |      76.8     |
+| [ECLeKTic][eclektic]                 |      4.69     |      11.0     |      17.2     |      24.4     |
+| [IndicGenBench][indicgenbench]       |      41.4     |      57.2     |      61.7     |      63.4     |
+[mgsm]: https://arxiv.org/abs/2210.03057
+[flores]: https://arxiv.org/abs/2106.03193
+[xquad]: https://arxiv.org/abs/1910.11856v3
+[global-mmlu-lite]: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite
+[wmt24pp]: https://arxiv.org/abs/2502.12404v1
+[eclektic]: https://arxiv.org/abs/2502.21228
+[indicgenbench]: https://arxiv.org/abs/2404.16816
+#### Multimodal
+| Benchmark                      | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
+| ------------------------------ |:-------------:|:--------------:|:--------------:|
+| [COCOcap][coco-cap]            |      102      |      111       |      116       |
+| [DocVQA][docvqa] (val)         |      72.8     |      82.3      |      85.6      |
+| [InfoVQA][info-vqa] (val)      |      44.1     |      54.8      |      59.4      |
+| [MMMU][mmmu] (pt)              |      39.2     |      50.3      |      56.1      |
+| [TextVQA][textvqa] (val)       |      58.9     |      66.5      |      68.6      |
+| [RealWorldQA][realworldqa]     |      45.5     |      52.2      |      53.9      |
+| [ReMI][remi]                   |      27.3     |      38.5      |      44.8      |
+| [AI2D][ai2d]                   |      63.2     |      75.2      |      79.0      |
+| [ChartQA][chartqa]             |      63.6     |      74.7      |      76.3      |
+| [VQAv2][vqav2]                 |      63.9     |      71.2      |      72.9      |
+| [BLINK][blinkvqa]              |      38.0     |      35.9      |      39.6      |
+| [OKVQA][okvqa]                 |      51.0     |      58.7      |      60.2      |
+| [TallyQA][tallyqa]             |      42.5     |      51.8      |      54.3      |
+| [SpatialSense VQA][ss-vqa]     |      50.9     |      60.0      |      59.4      |
+| [CountBenchQA][countbenchqa]   |      26.1     |      17.8      |      68.0      |
+[coco-cap]: https://cocodataset.org/#home
+[docvqa]: https://www.docvqa.org/
+[info-vqa]: https://arxiv.org/abs/2104.12756
+[mmmu]: https://arxiv.org/abs/2311.16502
+[textvqa]: https://textvqa.org/
+[realworldqa]: https://paperswithcode.com/dataset/realworldqa
+[remi]: https://arxiv.org/html/2406.09175v1
+[ai2d]: https://allenai.org/data/diagrams
+[chartqa]: https://arxiv.org/abs/2203.10244
+[vqav2]: https://visualqa.org/index.html
+[blinkvqa]: https://arxiv.org/abs/2404.12390
+[okvqa]: https://okvqa.allenai.org/
+[tallyqa]: https://arxiv.org/abs/1810.12440
+[ss-vqa]: https://arxiv.org/abs/1908.02660
+[countbenchqa]: https://github.com/google-research/big_vision/blob/main/big_vision/datasets/countbenchqa/
+## Ethics and Safety
+Ethics and safety evaluation approach and results.
+### Evaluation Approach
+Our evaluation methods include structured evaluations and internal red-teaming
+testing of relevant content policies. Red-teaming was conducted by a number of
+different teams, each with different goals and human evaluation metrics. These
+models were evaluated against a number of different categories relevant to
+ethics and safety, including:
+-   **Child Safety**: Evaluation of text-to-text and image to text prompts
+    covering child safety policies, including child sexual abuse and
+    exploitation.
+-   **Content Safety:** Evaluation of text-to-text and image to text prompts
+    covering safety policies including, harassment, violence and gore, and hate
+    speech.
+-   **Representational Harms**: Evaluation of text-to-text and image to text
+    prompts covering safety policies including bias, stereotyping, and harmful
+    associations or inaccuracies.
+In addition to development level evaluations, we conduct "assurance
+evaluations" which are our 'arms-length' internal evaluations for responsibility
+governance decision making. They are conducted separately from the model
+development team, to inform decision making about release. High level findings
+are fed back to the model team, but prompt sets are held-out to prevent
+overfitting and preserve the results' ability to inform decision making.
+Assurance evaluation results are reported to our Responsibility & Safety Council
+as part of release review.
+### Evaluation Results
+For all areas of safety testing, we saw major improvements in the categories of
+child safety, content safety, and representational harms relative to previous
+Gemma models. All testing was conducted without safety filters to evaluate the
+model capabilities and behaviors. For both text-to-text and image-to-text, and
+across all model sizes, the model produced minimal policy violations, and showed
+significant improvements over previous Gemma models' performance with respect
+to ungrounded inferences. A limitation of our evaluations was they included only
+English language prompts.
+## Usage and Limitations
+These models have certain limitations that users should be aware of.
+### Intended Usage
+Open vision-language models (VLMs) models have a wide range of applications
+across various industries and domains. The following list of potential uses is
+not comprehensive. The purpose of this list is to provide contextual information
+about the possible use-cases that the model creators considered as part of model
+training and development.
+-   Content Creation and Communication
+    -   Text Generation: These models can be used to generate creative text
+        formats such as poems, scripts, code, marketing copy, and email drafts.
+    -   Chatbots and Conversational AI: Power conversational interfaces
+        for customer service, virtual assistants, or interactive applications.
+    -   Text Summarization: Generate concise summaries of a text corpus,
+        research papers, or reports.
+    -   Image Data Extraction: These models can be used to extract,
+        interpret, and summarize visual data for text communications.
+-   Research and Education
+    -   Natural Language Processing (NLP) and VLM Research: These
+        models can serve as a foundation for researchers to experiment with VLM
+        and NLP techniques, develop algorithms, and contribute to the
+        advancement of the field.
+    -   Language Learning Tools: Support interactive language learning
+        experiences, aiding in grammar correction or providing writing practice.
+    -   Knowledge Exploration: Assist researchers in exploring large
+        bodies of text by generating summaries or answering questions about
+        specific topics.
+### Limitations
+-   Training Data
+    -   The quality and diversity of the training data significantly
+        influence the model's capabilities. Biases or gaps in the training data
+        can lead to limitations in the model's responses.
+    -   The scope of the training dataset determines the subject areas
+        the model can handle effectively.
+-   Context and Task Complexity
+    -   Models are better at tasks that can be framed with clear
+        prompts and instructions. Open-ended or highly complex tasks might be
+        challenging.
+    -   A model's performance can be influenced by the amount of context
+        provided (longer context generally leads to better outputs, up to a
+        certain point).
+-   Language Ambiguity and Nuance
+    -   Natural language is inherently complex. Models might struggle
+        to grasp subtle nuances, sarcasm, or figurative language.
+-   Factual Accuracy
+    -   Models generate responses based on information they learned
+        from their training datasets, but they are not knowledge bases. They
+        may generate incorrect or outdated factual statements.
+-   Common Sense
+    -   Models rely on statistical patterns in language. They might
+        lack the ability to apply common sense reasoning in certain situations.
+### Ethical Considerations and Risks
+The development of vision-language models (VLMs) raises several ethical
+concerns. In creating an open model, we have carefully considered the following:
+-   Bias and Fairness
+    -   VLMs trained on large-scale, real-world text and image data can
+        reflect socio-cultural biases embedded in the training material. These
+        models underwent careful scrutiny, input data pre-processing described
+        and posterior evaluations reported in this card.
+-   Misinformation and Misuse
+    -   VLMs can be misused to generate text that is false, misleading,
+        or harmful.
+    -   Guidelines are provided for responsible use with the model, see the
+        [Responsible Generative AI Toolkit][rai-toolkit].
+-   Transparency and Accountability:
+    -   This model card summarizes details on the models' architecture,
+        capabilities, limitations, and evaluation processes.
+    -   A responsibly developed open model offers the opportunity to
+        share innovation by making VLM technology accessible to developers and
+        researchers across the AI ecosystem.
+Risks identified and mitigations:
+-   **Perpetuation of biases**: It's encouraged to perform continuous
+    monitoring (using evaluation metrics, human review) and the exploration of
+    de-biasing techniques during model training, fine-tuning, and other use
+    cases.
+-   **Generation of harmful content**: Mechanisms and guidelines for content
+    safety are essential. Developers are encouraged to exercise caution and
+    implement appropriate content safety safeguards based on their specific
+    product policies and application use cases.
+-   **Misuse for malicious purposes**: Technical limitations and developer
+    and end-user education can help mitigate against malicious applications of
+    VLMs. Educational resources and reporting mechanisms for users to flag
+    misuse are provided. Prohibited uses of Gemma models are outlined in the
+    [Gemma Prohibited Use Policy][prohibited-use].
+-   **Privacy violations**: Models were trained on data filtered for removal
+    of certain personal information and other sensitive data. Developers are
+    encouraged to adhere to privacy regulations with privacy-preserving
+    techniques.
+### Benefits
+At the time of release, this family of models provides high-performance open
+vision-language model implementations designed from the ground up for
+responsible AI development compared to similarly sized models.
+Using the benchmark evaluation metrics described in this document, these models
+have shown to provide superior performance to other, comparably-sized open model
+alternatives.
+[g3-tech-report]: https://goo.gle/Gemma3Report
+[rai-toolkit]: https://ai.google.dev/responsible
+[kaggle-gemma]: https://www.kaggle.com/models/google/gemma-3
+[vertex-mg-gemma3]: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemma3
+[terms]: https://ai.google.dev/gemma/terms
+[safety-policies]: https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf
+[prohibited-use]: https://ai.google.dev/gemma/prohibited_use_policy
+[tpu]: https://cloud.google.com/tpu/docs/intro-to-tpu
+[sustainability]: https://sustainability.google/operating-sustainably/
+[jax]: https://github.com/jax-ml/jax
+[ml-pathways]: https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/
+[sustainability]: https://sustainability.google/operating-sustainably/
+[gemini-2-paper]: https://arxiv.org/abs/2312.11805

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n"
+}

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "architectures": [
+    "Gemma3ForConditionalGeneration"
+  ],
+  "boi_token_index": 255999,
+  "eoi_token_index": 256000,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "image_token_index": 262144,
+  "initializer_range": 0.02,
+  "mm_tokens_per_image": 256,
+  "model_type": "gemma3",
+  "text_config": {
+    "head_dim": 128,
+    "hidden_size": 5376,
+    "intermediate_size": 21504,
+    "model_type": "gemma3_text",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 62,
+    "num_key_value_heads": 16,
+    "query_pre_attn_scalar": 168,
+    "rope_scaling": {
+      "factor": 8.0,
+      "rope_type": "linear"
+    },
+    "sliding_window": 1024
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0.dev0",
+  "vision_config": {
+    "hidden_size": 1152,
+    "image_size": 896,
+    "intermediate_size": 4304,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "vision_use_head": false
+  }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "pad_token_id": 0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.50.0.dev0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00008.safetensors.part0000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38a8a30ee02c04f82a7b93ceda3c975a1277076a27c2391054e1a2721382e5ea
+size 536870912

model-00001-of-00008.safetensors.part0001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e82d0d29183894972d98942c65c4e59e522d3992b8741ae65552af56298e1131
+size 536870912

model-00001-of-00008.safetensors.part0002 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:213ad447befa7f05e681694500cd1a0a6c0c2fe736a427c173e7527f336aaf53
+size 536870912

model-00001-of-00008.safetensors.part0003 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe6737f2884de4b451979e2d8f54edac7d663742d171ce9616e4c0d73bf3e32
+size 536870912

model-00001-of-00008.safetensors.part0004 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38924b68e4fe0073ac28c1705d4a703347fe1d66344fe0c5cb1e32af6df1da4f
+size 536870912

model-00001-of-00008.safetensors.part0005 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fba362441a35886dbedeb11bf7b7e7b1aa4fb7e07e8352b33bd1aabc787f654d
+size 536870912

model-00001-of-00008.safetensors.part0006 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99c134952e8c633f8fbca526065b52e4bb45c0c9e46394eda8abfcbcc1a084b3
+size 536870912

model-00001-of-00008.safetensors.part0007 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ecfc2e41b382a363fff272c79a3f114aaf7e2cb84663996a9073bb0602b8e92
+size 83692160

model-00001-of-00012.safetensors.part0000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6aedb4b68c3dab8ae431d259d6f08a6185e5e594fd273465a39238087db987
+size 536870912

model-00001-of-00012.safetensors.part0001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48c3bc1ce71d62650a6f5866f795415321415b7d4a6e4668f5cb822c245ccfba
+size 536870912

model-00001-of-00012.safetensors.part0002 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe37bfeb0288abdcae2aa4e79f20e6f48078e6aaa69d10774026bad7d5275f4a
+size 536870912

model-00001-of-00012.safetensors.part0003 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bf0d1c6bacaa7012611960651f3122a44b90b2cbd4a3477227c0cccbaed844a
+size 536870912

model-00001-of-00012.safetensors.part0004 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01506dcf90d23125f4c574fd542d2d69263d9fc64bca119f30bc7e09293f9428
+size 536870912

model-00001-of-00012.safetensors.part0005 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10a88ea808aa503c889f9f6eccc070b523c7592a5c9649e34c853deba0a2b541
+size 536870912

model-00001-of-00012.safetensors.part0006 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf97a9c05c6b8a53f6a4c619931e10a8324824ab6dab317aac1926ac6d01d7a1
+size 536870912

model-00001-of-00012.safetensors.part0007 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e2a358f5083cd1f26eb3ca0b9345ec505f770117be1cce345d297b00b11a3b6
+size 536870912

model-00001-of-00012.safetensors.part0008 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:231bb703645cfd4a64472f1e3a5b82d433d187a88369da43d9a82320a7b2698b
+size 536870912

model-00001-of-00012.safetensors.part0009 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d492016bc971fba225c68d76edf2e376f1d5d2a6c2c4f992b21d7314becae2b
+size 22735488

model-00002-of-00008.safetensors.part0000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cb609b50f6eaffd2ee1c55efff2c08eaa5ef2d83845c29a7bec282e2b2a9738
+size 536870912

model-00002-of-00008.safetensors.part0001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc1ac613a8506392865f234f6890b08c91de7f5b17f49b5573d0b25227d26b3a
+size 536870912

model-00002-of-00008.safetensors.part0002 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd2d3d838164e1ba4550e97b207eb34fe39c2f9f80d2763d449f52b8abf8bac4
+size 536870912

model-00002-of-00008.safetensors.part0003 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:734eaadc087ac78dea838868ea4ac00bc625fba57c10e9bc1c48f99fd15fbc6c
+size 536870912

model-00002-of-00008.safetensors.part0004 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c815e1c176c737f9cbc29039b9335c703966140d6e3cc25875af25f4ceb67e3
+size 536870912

model-00002-of-00008.safetensors.part0005 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e9f6f20e3145ad027df0f71a1a12bdd732574184a219b8ac7de8a5f0b87add
+size 536870912

model-00002-of-00008.safetensors.part0006 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6227ff888fece4c7c0d18b0c73b2a5e06f7f4c5399110647aa55eeea157ad9ec
+size 536870912

model-00002-of-00008.safetensors.part0007 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62e1a9ad550671b0b6c135b51538359eb0bcdac42b1f363bbc92db657bc181c8
+size 205654432

model-00002-of-00012.safetensors.part0000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e11c30cd4b5d383144d54391192031c3e8a92a9bd23069ebf3ca002674e8ee1
+size 536870912

model-00002-of-00012.safetensors.part0001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44b96cd4b1552a36609346f0fc54bbeb8536f4461035838c31548381167d1850
+size 536870912

model-00002-of-00012.safetensors.part0002 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a0a9bc115d2996d91f27b943e741670e40eb573d483dd2364cd0b18f647a24f
+size 536870912

model-00002-of-00012.safetensors.part0003 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10b8d035cdd2a4fc17e20a66b84ceb1ad2f888876f1673e597daebf2e1bd9cc7
+size 536870912

model-00002-of-00012.safetensors.part0004 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1b142310043c41e8f70cc5e33813f26badc0747914ce3e36d1aa7928fca58b3
+size 536870912

model-00002-of-00012.safetensors.part0005 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5349653db6c985ec59b34b9430abf903289e4fbd6458dbe053722964a6d8c5e
+size 536870912

model-00002-of-00012.safetensors.part0006 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:657e27c5ad16bfa5c75917d5df8f9e8c889e59fab5d6f6dd723af4b563ab2872
+size 536870912

model-00002-of-00012.safetensors.part0007 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b53fbf1488ee0bc5b6aa41342fc43f7db03ac878e40758334e53b76948a7c356
+size 536870912

model-00002-of-00012.safetensors.part0008 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:537b93c62c9a1e53f5386e848056508755a6f4bee4e9fdee4c06f5f3fdd0ebd0
+size 536870912

model-00002-of-00012.safetensors.part0009 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8869a8cdfae6b07529159d3bc0c3812cc983186447159fdd8e37472b0b1fa734
+size 122954736

model-00003-of-00008.safetensors.part0000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59f1f45dd4f64bff890efb0ea3e7e97fcbadc4faa21babef38e95f02f204a293
+size 536870912

model-00003-of-00008.safetensors.part0001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66d226973f0253a650988504b21ddbc07a540e1731afe3e7b944fefa4ce61f3c
+size 536870912

model-00003-of-00008.safetensors.part0002 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c300d9ccbfa622cbc11621935a2392d1953cf3db15e673134ca8feec7a1fc1dd
+size 536870912

model-00003-of-00008.safetensors.part0003 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34d1df843a29dd02c9e9b94a30237a6a205a50ee201dea1496dea73db81e4982
+size 536870912

model-00003-of-00008.safetensors.part0004 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1de57fdac526c53b22c3ca7621bf64ebc18aa499b153842420e2755fb76c3e3f
+size 536870912

model-00003-of-00008.safetensors.part0005 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0eb5488ea7228c461e131249b5edf3f8818ae89eb006275d2f2c2e5ec0f79ab5
+size 536870912