Upload 8 files

Browse files

Files changed (8) hide show

added_tokens.json +1049 -0
chat_template.jinja +52 -0
config.json +233 -0
configuration_intern_vit.py +118 -0
configuration_yuan.py +153 -0
configuration_yuanvl.py +134 -0
conversation.py +399 -0
model.safetensors.index.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,1049 @@

+{
+  "</3dbox>": 134971,
+  "</IMAGE>": 134962,
+  "</box>": 134967,
+  "</code_query>": 135987,
+  "</code_result>": 135989,
+  "</depth>": 134973,
+  "</final_answer>": 135999,
+  "</infer>": 135991,
+  "</inferresult>": 135993,
+  "</obj>": 134965,
+  "</point>": 134969,
+  "</search_query>": 135985,
+  "</search_result>": 135983,
+  "</think>": 135981,
+  "</tool_calls>": 135995,
+  "</tool_response>": 135997,
+  "<3dbox>": 134970,
+  "<BOS>": 134960,
+  "<FIM_MIDDLE>": 134957,
+  "<FIM_PREFIX>": 134956,
+  "<FIM_SUFFIX>": 134955,
+  "<IMAGE>": 134961,
+  "<box>": 134966,
+  "<code_query>": 135986,
+  "<code_result>": 135988,
+  "<depth>": 134972,
+  "<eog>": 135975,
+  "<eop>": 135974,
+  "<file_sep>": 134959,
+  "<final_answer>": 135998,
+  "<grounding>": 134963,
+  "<infer>": 135990,
+  "<inferresult>": 135992,
+  "<mask>": 134953,
+  "<obj>": 134964,
+  "<point>": 134968,
+  "<predict>": 134954,
+  "<repo_name>": 134958,
+  "<search_query>": 135984,
+  "<search_result>": 135982,
+  "<think>": 135980,
+  "<tool_calls>": 135994,
+  "<tool_response>": 135996,
+  "<|Assistant|>": 135979,
+  "<|User|>": 135978,
+  "<|begin_of_sentence|>": 135976,
+  "<|end_of_sentence|>": 135977,
+  "s000": 134974,
+  "s001": 134975,
+  "s002": 134976,
+  "s003": 134977,
+  "s004": 134978,
+  "s005": 134979,
+  "s006": 134980,
+  "s007": 134981,
+  "s008": 134982,
+  "s009": 134983,
+  "s010": 134984,
+  "s011": 134985,
+  "s012": 134986,
+  "s013": 134987,
+  "s014": 134988,
+  "s015": 134989,
+  "s016": 134990,
+  "s017": 134991,
+  "s018": 134992,
+  "s019": 134993,
+  "s020": 134994,
+  "s021": 134995,
+  "s022": 134996,
+  "s023": 134997,
+  "s024": 134998,
+  "s025": 134999,
+  "s026": 135000,
+  "s027": 135001,
+  "s028": 135002,
+  "s029": 135003,
+  "s030": 135004,
+  "s031": 135005,
+  "s032": 135006,
+  "s033": 135007,
+  "s034": 135008,
+  "s035": 135009,
+  "s036": 135010,
+  "s037": 135011,
+  "s038": 135012,
+  "s039": 135013,
+  "s040": 135014,
+  "s041": 135015,
+  "s042": 135016,
+  "s043": 135017,
+  "s044": 135018,
+  "s045": 135019,
+  "s046": 135020,
+  "s047": 135021,
+  "s048": 135022,
+  "s049": 135023,
+  "s050": 135024,
+  "s051": 135025,
+  "s052": 135026,
+  "s053": 135027,
+  "s054": 135028,
+  "s055": 135029,
+  "s056": 135030,
+  "s057": 135031,
+  "s058": 135032,
+  "s059": 135033,
+  "s060": 135034,
+  "s061": 135035,
+  "s062": 135036,
+  "s063": 135037,
+  "s064": 135038,
+  "s065": 135039,
+  "s066": 135040,
+  "s067": 135041,
+  "s068": 135042,
+  "s069": 135043,
+  "s070": 135044,
+  "s071": 135045,
+  "s072": 135046,
+  "s073": 135047,
+  "s074": 135048,
+  "s075": 135049,
+  "s076": 135050,
+  "s077": 135051,
+  "s078": 135052,
+  "s079": 135053,
+  "s080": 135054,
+  "s081": 135055,
+  "s082": 135056,
+  "s083": 135057,
+  "s084": 135058,
+  "s085": 135059,
+  "s086": 135060,
+  "s087": 135061,
+  "s088": 135062,
+  "s089": 135063,
+  "s090": 135064,
+  "s091": 135065,
+  "s092": 135066,
+  "s093": 135067,
+  "s094": 135068,
+  "s095": 135069,
+  "s096": 135070,
+  "s097": 135071,
+  "s098": 135072,
+  "s099": 135073,
+  "s100": 135074,
+  "s101": 135075,
+  "s102": 135076,
+  "s103": 135077,
+  "s104": 135078,
+  "s105": 135079,
+  "s106": 135080,
+  "s107": 135081,
+  "s108": 135082,
+  "s109": 135083,
+  "s110": 135084,
+  "s111": 135085,
+  "s112": 135086,
+  "s113": 135087,
+  "s114": 135088,
+  "s115": 135089,
+  "s116": 135090,
+  "s117": 135091,
+  "s118": 135092,
+  "s119": 135093,
+  "s120": 135094,
+  "s121": 135095,
+  "s122": 135096,
+  "s123": 135097,
+  "s124": 135098,
+  "s125": 135099,
+  "s126": 135100,
+  "s127": 135101,
+  "s128": 135102,
+  "s129": 135103,
+  "s130": 135104,
+  "s131": 135105,
+  "s132": 135106,
+  "s133": 135107,
+  "s134": 135108,
+  "s135": 135109,
+  "s136": 135110,
+  "s137": 135111,
+  "s138": 135112,
+  "s139": 135113,
+  "s140": 135114,
+  "s141": 135115,
+  "s142": 135116,
+  "s143": 135117,
+  "s144": 135118,
+  "s145": 135119,
+  "s146": 135120,
+  "s147": 135121,
+  "s148": 135122,
+  "s149": 135123,
+  "s150": 135124,
+  "s151": 135125,
+  "s152": 135126,
+  "s153": 135127,
+  "s154": 135128,
+  "s155": 135129,
+  "s156": 135130,
+  "s157": 135131,
+  "s158": 135132,
+  "s159": 135133,
+  "s160": 135134,
+  "s161": 135135,
+  "s162": 135136,
+  "s163": 135137,
+  "s164": 135138,
+  "s165": 135139,
+  "s166": 135140,
+  "s167": 135141,
+  "s168": 135142,
+  "s169": 135143,
+  "s170": 135144,
+  "s171": 135145,
+  "s172": 135146,
+  "s173": 135147,
+  "s174": 135148,
+  "s175": 135149,
+  "s176": 135150,
+  "s177": 135151,
+  "s178": 135152,
+  "s179": 135153,
+  "s180": 135154,
+  "s181": 135155,
+  "s182": 135156,
+  "s183": 135157,
+  "s184": 135158,
+  "s185": 135159,
+  "s186": 135160,
+  "s187": 135161,
+  "s188": 135162,
+  "s189": 135163,
+  "s190": 135164,
+  "s191": 135165,
+  "s192": 135166,
+  "s193": 135167,
+  "s194": 135168,
+  "s195": 135169,
+  "s196": 135170,
+  "s197": 135171,
+  "s198": 135172,
+  "s199": 135173,
+  "s200": 135174,
+  "s201": 135175,
+  "s202": 135176,
+  "s203": 135177,
+  "s204": 135178,
+  "s205": 135179,
+  "s206": 135180,
+  "s207": 135181,
+  "s208": 135182,
+  "s209": 135183,
+  "s210": 135184,
+  "s211": 135185,
+  "s212": 135186,
+  "s213": 135187,
+  "s214": 135188,
+  "s215": 135189,
+  "s216": 135190,
+  "s217": 135191,
+  "s218": 135192,
+  "s219": 135193,
+  "s220": 135194,
+  "s221": 135195,
+  "s222": 135196,
+  "s223": 135197,
+  "s224": 135198,
+  "s225": 135199,
+  "s226": 135200,
+  "s227": 135201,
+  "s228": 135202,
+  "s229": 135203,
+  "s230": 135204,
+  "s231": 135205,
+  "s232": 135206,
+  "s233": 135207,
+  "s234": 135208,
+  "s235": 135209,
+  "s236": 135210,
+  "s237": 135211,
+  "s238": 135212,
+  "s239": 135213,
+  "s240": 135214,
+  "s241": 135215,
+  "s242": 135216,
+  "s243": 135217,
+  "s244": 135218,
+  "s245": 135219,
+  "s246": 135220,
+  "s247": 135221,
+  "s248": 135222,
+  "s249": 135223,
+  "s250": 135224,
+  "s251": 135225,
+  "s252": 135226,
+  "s253": 135227,
+  "s254": 135228,
+  "s255": 135229,
+  "s256": 135230,
+  "s257": 135231,
+  "s258": 135232,
+  "s259": 135233,
+  "s260": 135234,
+  "s261": 135235,
+  "s262": 135236,
+  "s263": 135237,
+  "s264": 135238,
+  "s265": 135239,
+  "s266": 135240,
+  "s267": 135241,
+  "s268": 135242,
+  "s269": 135243,
+  "s270": 135244,
+  "s271": 135245,
+  "s272": 135246,
+  "s273": 135247,
+  "s274": 135248,
+  "s275": 135249,
+  "s276": 135250,
+  "s277": 135251,
+  "s278": 135252,
+  "s279": 135253,
+  "s280": 135254,
+  "s281": 135255,
+  "s282": 135256,
+  "s283": 135257,
+  "s284": 135258,
+  "s285": 135259,
+  "s286": 135260,
+  "s287": 135261,
+  "s288": 135262,
+  "s289": 135263,
+  "s290": 135264,
+  "s291": 135265,
+  "s292": 135266,
+  "s293": 135267,
+  "s294": 135268,
+  "s295": 135269,
+  "s296": 135270,
+  "s297": 135271,
+  "s298": 135272,
+  "s299": 135273,
+  "s300": 135274,
+  "s301": 135275,
+  "s302": 135276,
+  "s303": 135277,
+  "s304": 135278,
+  "s305": 135279,
+  "s306": 135280,
+  "s307": 135281,
+  "s308": 135282,
+  "s309": 135283,
+  "s310": 135284,
+  "s311": 135285,
+  "s312": 135286,
+  "s313": 135287,
+  "s314": 135288,
+  "s315": 135289,
+  "s316": 135290,
+  "s317": 135291,
+  "s318": 135292,
+  "s319": 135293,
+  "s320": 135294,
+  "s321": 135295,
+  "s322": 135296,
+  "s323": 135297,
+  "s324": 135298,
+  "s325": 135299,
+  "s326": 135300,
+  "s327": 135301,
+  "s328": 135302,
+  "s329": 135303,
+  "s330": 135304,
+  "s331": 135305,
+  "s332": 135306,
+  "s333": 135307,
+  "s334": 135308,
+  "s335": 135309,
+  "s336": 135310,
+  "s337": 135311,
+  "s338": 135312,
+  "s339": 135313,
+  "s340": 135314,
+  "s341": 135315,
+  "s342": 135316,
+  "s343": 135317,
+  "s344": 135318,
+  "s345": 135319,
+  "s346": 135320,
+  "s347": 135321,
+  "s348": 135322,
+  "s349": 135323,
+  "s350": 135324,
+  "s351": 135325,
+  "s352": 135326,
+  "s353": 135327,
+  "s354": 135328,
+  "s355": 135329,
+  "s356": 135330,
+  "s357": 135331,
+  "s358": 135332,
+  "s359": 135333,
+  "s360": 135334,
+  "s361": 135335,
+  "s362": 135336,
+  "s363": 135337,
+  "s364": 135338,
+  "s365": 135339,
+  "s366": 135340,
+  "s367": 135341,
+  "s368": 135342,
+  "s369": 135343,
+  "s370": 135344,
+  "s371": 135345,
+  "s372": 135346,
+  "s373": 135347,
+  "s374": 135348,
+  "s375": 135349,
+  "s376": 135350,
+  "s377": 135351,
+  "s378": 135352,
+  "s379": 135353,
+  "s380": 135354,
+  "s381": 135355,
+  "s382": 135356,
+  "s383": 135357,
+  "s384": 135358,
+  "s385": 135359,
+  "s386": 135360,
+  "s387": 135361,
+  "s388": 135362,
+  "s389": 135363,
+  "s390": 135364,
+  "s391": 135365,
+  "s392": 135366,
+  "s393": 135367,
+  "s394": 135368,
+  "s395": 135369,
+  "s396": 135370,
+  "s397": 135371,
+  "s398": 135372,
+  "s399": 135373,
+  "s400": 135374,
+  "s401": 135375,
+  "s402": 135376,
+  "s403": 135377,
+  "s404": 135378,
+  "s405": 135379,
+  "s406": 135380,
+  "s407": 135381,
+  "s408": 135382,
+  "s409": 135383,
+  "s410": 135384,
+  "s411": 135385,
+  "s412": 135386,
+  "s413": 135387,
+  "s414": 135388,
+  "s415": 135389,
+  "s416": 135390,
+  "s417": 135391,
+  "s418": 135392,
+  "s419": 135393,
+  "s420": 135394,
+  "s421": 135395,
+  "s422": 135396,
+  "s423": 135397,
+  "s424": 135398,
+  "s425": 135399,
+  "s426": 135400,
+  "s427": 135401,
+  "s428": 135402,
+  "s429": 135403,
+  "s430": 135404,
+  "s431": 135405,
+  "s432": 135406,
+  "s433": 135407,
+  "s434": 135408,
+  "s435": 135409,
+  "s436": 135410,
+  "s437": 135411,
+  "s438": 135412,
+  "s439": 135413,
+  "s440": 135414,
+  "s441": 135415,
+  "s442": 135416,
+  "s443": 135417,
+  "s444": 135418,
+  "s445": 135419,
+  "s446": 135420,
+  "s447": 135421,
+  "s448": 135422,
+  "s449": 135423,
+  "s450": 135424,
+  "s451": 135425,
+  "s452": 135426,
+  "s453": 135427,
+  "s454": 135428,
+  "s455": 135429,
+  "s456": 135430,
+  "s457": 135431,
+  "s458": 135432,
+  "s459": 135433,
+  "s460": 135434,
+  "s461": 135435,
+  "s462": 135436,
+  "s463": 135437,
+  "s464": 135438,
+  "s465": 135439,
+  "s466": 135440,
+  "s467": 135441,
+  "s468": 135442,
+  "s469": 135443,
+  "s470": 135444,
+  "s471": 135445,
+  "s472": 135446,
+  "s473": 135447,
+  "s474": 135448,
+  "s475": 135449,
+  "s476": 135450,
+  "s477": 135451,
+  "s478": 135452,
+  "s479": 135453,
+  "s480": 135454,
+  "s481": 135455,
+  "s482": 135456,
+  "s483": 135457,
+  "s484": 135458,
+  "s485": 135459,
+  "s486": 135460,
+  "s487": 135461,
+  "s488": 135462,
+  "s489": 135463,
+  "s490": 135464,
+  "s491": 135465,
+  "s492": 135466,
+  "s493": 135467,
+  "s494": 135468,
+  "s495": 135469,
+  "s496": 135470,
+  "s497": 135471,
+  "s498": 135472,
+  "s499": 135473,
+  "s500": 135474,
+  "s501": 135475,
+  "s502": 135476,
+  "s503": 135477,
+  "s504": 135478,
+  "s505": 135479,
+  "s506": 135480,
+  "s507": 135481,
+  "s508": 135482,
+  "s509": 135483,
+  "s510": 135484,
+  "s511": 135485,
+  "s512": 135486,
+  "s513": 135487,
+  "s514": 135488,
+  "s515": 135489,
+  "s516": 135490,
+  "s517": 135491,
+  "s518": 135492,
+  "s519": 135493,
+  "s520": 135494,
+  "s521": 135495,
+  "s522": 135496,
+  "s523": 135497,
+  "s524": 135498,
+  "s525": 135499,
+  "s526": 135500,
+  "s527": 135501,
+  "s528": 135502,
+  "s529": 135503,
+  "s530": 135504,
+  "s531": 135505,
+  "s532": 135506,
+  "s533": 135507,
+  "s534": 135508,
+  "s535": 135509,
+  "s536": 135510,
+  "s537": 135511,
+  "s538": 135512,
+  "s539": 135513,
+  "s540": 135514,
+  "s541": 135515,
+  "s542": 135516,
+  "s543": 135517,
+  "s544": 135518,
+  "s545": 135519,
+  "s546": 135520,
+  "s547": 135521,
+  "s548": 135522,
+  "s549": 135523,
+  "s550": 135524,
+  "s551": 135525,
+  "s552": 135526,
+  "s553": 135527,
+  "s554": 135528,
+  "s555": 135529,
+  "s556": 135530,
+  "s557": 135531,
+  "s558": 135532,
+  "s559": 135533,
+  "s560": 135534,
+  "s561": 135535,
+  "s562": 135536,
+  "s563": 135537,
+  "s564": 135538,
+  "s565": 135539,
+  "s566": 135540,
+  "s567": 135541,
+  "s568": 135542,
+  "s569": 135543,
+  "s570": 135544,
+  "s571": 135545,
+  "s572": 135546,
+  "s573": 135547,
+  "s574": 135548,
+  "s575": 135549,
+  "s576": 135550,
+  "s577": 135551,
+  "s578": 135552,
+  "s579": 135553,
+  "s580": 135554,
+  "s581": 135555,
+  "s582": 135556,
+  "s583": 135557,
+  "s584": 135558,
+  "s585": 135559,
+  "s586": 135560,
+  "s587": 135561,
+  "s588": 135562,
+  "s589": 135563,
+  "s590": 135564,
+  "s591": 135565,
+  "s592": 135566,
+  "s593": 135567,
+  "s594": 135568,
+  "s595": 135569,
+  "s596": 135570,
+  "s597": 135571,
+  "s598": 135572,
+  "s599": 135573,
+  "s600": 135574,
+  "s601": 135575,
+  "s602": 135576,
+  "s603": 135577,
+  "s604": 135578,
+  "s605": 135579,
+  "s606": 135580,
+  "s607": 135581,
+  "s608": 135582,
+  "s609": 135583,
+  "s610": 135584,
+  "s611": 135585,
+  "s612": 135586,
+  "s613": 135587,
+  "s614": 135588,
+  "s615": 135589,
+  "s616": 135590,
+  "s617": 135591,
+  "s618": 135592,
+  "s619": 135593,
+  "s620": 135594,
+  "s621": 135595,
+  "s622": 135596,
+  "s623": 135597,
+  "s624": 135598,
+  "s625": 135599,
+  "s626": 135600,
+  "s627": 135601,
+  "s628": 135602,
+  "s629": 135603,
+  "s630": 135604,
+  "s631": 135605,
+  "s632": 135606,
+  "s633": 135607,
+  "s634": 135608,
+  "s635": 135609,
+  "s636": 135610,
+  "s637": 135611,
+  "s638": 135612,
+  "s639": 135613,
+  "s640": 135614,
+  "s641": 135615,
+  "s642": 135616,
+  "s643": 135617,
+  "s644": 135618,
+  "s645": 135619,
+  "s646": 135620,
+  "s647": 135621,
+  "s648": 135622,
+  "s649": 135623,
+  "s650": 135624,
+  "s651": 135625,
+  "s652": 135626,
+  "s653": 135627,
+  "s654": 135628,
+  "s655": 135629,
+  "s656": 135630,
+  "s657": 135631,
+  "s658": 135632,
+  "s659": 135633,
+  "s660": 135634,
+  "s661": 135635,
+  "s662": 135636,
+  "s663": 135637,
+  "s664": 135638,
+  "s665": 135639,
+  "s666": 135640,
+  "s667": 135641,
+  "s668": 135642,
+  "s669": 135643,
+  "s670": 135644,
+  "s671": 135645,
+  "s672": 135646,
+  "s673": 135647,
+  "s674": 135648,
+  "s675": 135649,
+  "s676": 135650,
+  "s677": 135651,
+  "s678": 135652,
+  "s679": 135653,
+  "s680": 135654,
+  "s681": 135655,
+  "s682": 135656,
+  "s683": 135657,
+  "s684": 135658,
+  "s685": 135659,
+  "s686": 135660,
+  "s687": 135661,
+  "s688": 135662,
+  "s689": 135663,
+  "s690": 135664,
+  "s691": 135665,
+  "s692": 135666,
+  "s693": 135667,
+  "s694": 135668,
+  "s695": 135669,
+  "s696": 135670,
+  "s697": 135671,
+  "s698": 135672,
+  "s699": 135673,
+  "s700": 135674,
+  "s701": 135675,
+  "s702": 135676,
+  "s703": 135677,
+  "s704": 135678,
+  "s705": 135679,
+  "s706": 135680,
+  "s707": 135681,
+  "s708": 135682,
+  "s709": 135683,
+  "s710": 135684,
+  "s711": 135685,
+  "s712": 135686,
+  "s713": 135687,
+  "s714": 135688,
+  "s715": 135689,
+  "s716": 135690,
+  "s717": 135691,
+  "s718": 135692,
+  "s719": 135693,
+  "s720": 135694,
+  "s721": 135695,
+  "s722": 135696,
+  "s723": 135697,
+  "s724": 135698,
+  "s725": 135699,
+  "s726": 135700,
+  "s727": 135701,
+  "s728": 135702,
+  "s729": 135703,
+  "s730": 135704,
+  "s731": 135705,
+  "s732": 135706,
+  "s733": 135707,
+  "s734": 135708,
+  "s735": 135709,
+  "s736": 135710,
+  "s737": 135711,
+  "s738": 135712,
+  "s739": 135713,
+  "s740": 135714,
+  "s741": 135715,
+  "s742": 135716,
+  "s743": 135717,
+  "s744": 135718,
+  "s745": 135719,
+  "s746": 135720,
+  "s747": 135721,
+  "s748": 135722,
+  "s749": 135723,
+  "s750": 135724,
+  "s751": 135725,
+  "s752": 135726,
+  "s753": 135727,
+  "s754": 135728,
+  "s755": 135729,
+  "s756": 135730,
+  "s757": 135731,
+  "s758": 135732,
+  "s759": 135733,
+  "s760": 135734,
+  "s761": 135735,
+  "s762": 135736,
+  "s763": 135737,
+  "s764": 135738,
+  "s765": 135739,
+  "s766": 135740,
+  "s767": 135741,
+  "s768": 135742,
+  "s769": 135743,
+  "s770": 135744,
+  "s771": 135745,
+  "s772": 135746,
+  "s773": 135747,
+  "s774": 135748,
+  "s775": 135749,
+  "s776": 135750,
+  "s777": 135751,
+  "s778": 135752,
+  "s779": 135753,
+  "s780": 135754,
+  "s781": 135755,
+  "s782": 135756,
+  "s783": 135757,
+  "s784": 135758,
+  "s785": 135759,
+  "s786": 135760,
+  "s787": 135761,
+  "s788": 135762,
+  "s789": 135763,
+  "s790": 135764,
+  "s791": 135765,
+  "s792": 135766,
+  "s793": 135767,
+  "s794": 135768,
+  "s795": 135769,
+  "s796": 135770,
+  "s797": 135771,
+  "s798": 135772,
+  "s799": 135773,
+  "s800": 135774,
+  "s801": 135775,
+  "s802": 135776,
+  "s803": 135777,
+  "s804": 135778,
+  "s805": 135779,
+  "s806": 135780,
+  "s807": 135781,
+  "s808": 135782,
+  "s809": 135783,
+  "s810": 135784,
+  "s811": 135785,
+  "s812": 135786,
+  "s813": 135787,
+  "s814": 135788,
+  "s815": 135789,
+  "s816": 135790,
+  "s817": 135791,
+  "s818": 135792,
+  "s819": 135793,
+  "s820": 135794,
+  "s821": 135795,
+  "s822": 135796,
+  "s823": 135797,
+  "s824": 135798,
+  "s825": 135799,
+  "s826": 135800,
+  "s827": 135801,
+  "s828": 135802,
+  "s829": 135803,
+  "s830": 135804,
+  "s831": 135805,
+  "s832": 135806,
+  "s833": 135807,
+  "s834": 135808,
+  "s835": 135809,
+  "s836": 135810,
+  "s837": 135811,
+  "s838": 135812,
+  "s839": 135813,
+  "s840": 135814,
+  "s841": 135815,
+  "s842": 135816,
+  "s843": 135817,
+  "s844": 135818,
+  "s845": 135819,
+  "s846": 135820,
+  "s847": 135821,
+  "s848": 135822,
+  "s849": 135823,
+  "s850": 135824,
+  "s851": 135825,
+  "s852": 135826,
+  "s853": 135827,
+  "s854": 135828,
+  "s855": 135829,
+  "s856": 135830,
+  "s857": 135831,
+  "s858": 135832,
+  "s859": 135833,
+  "s860": 135834,
+  "s861": 135835,
+  "s862": 135836,
+  "s863": 135837,
+  "s864": 135838,
+  "s865": 135839,
+  "s866": 135840,
+  "s867": 135841,
+  "s868": 135842,
+  "s869": 135843,
+  "s870": 135844,
+  "s871": 135845,
+  "s872": 135846,
+  "s873": 135847,
+  "s874": 135848,
+  "s875": 135849,
+  "s876": 135850,
+  "s877": 135851,
+  "s878": 135852,
+  "s879": 135853,
+  "s880": 135854,
+  "s881": 135855,
+  "s882": 135856,
+  "s883": 135857,
+  "s884": 135858,
+  "s885": 135859,
+  "s886": 135860,
+  "s887": 135861,
+  "s888": 135862,
+  "s889": 135863,
+  "s890": 135864,
+  "s891": 135865,
+  "s892": 135866,
+  "s893": 135867,
+  "s894": 135868,
+  "s895": 135869,
+  "s896": 135870,
+  "s897": 135871,
+  "s898": 135872,
+  "s899": 135873,
+  "s900": 135874,
+  "s901": 135875,
+  "s902": 135876,
+  "s903": 135877,
+  "s904": 135878,
+  "s905": 135879,
+  "s906": 135880,
+  "s907": 135881,
+  "s908": 135882,
+  "s909": 135883,
+  "s910": 135884,
+  "s911": 135885,
+  "s912": 135886,
+  "s913": 135887,
+  "s914": 135888,
+  "s915": 135889,
+  "s916": 135890,
+  "s917": 135891,
+  "s918": 135892,
+  "s919": 135893,
+  "s920": 135894,
+  "s921": 135895,
+  "s922": 135896,
+  "s923": 135897,
+  "s924": 135898,
+  "s925": 135899,
+  "s926": 135900,
+  "s927": 135901,
+  "s928": 135902,
+  "s929": 135903,
+  "s930": 135904,
+  "s931": 135905,
+  "s932": 135906,
+  "s933": 135907,
+  "s934": 135908,
+  "s935": 135909,
+  "s936": 135910,
+  "s937": 135911,
+  "s938": 135912,
+  "s939": 135913,
+  "s940": 135914,
+  "s941": 135915,
+  "s942": 135916,
+  "s943": 135917,
+  "s944": 135918,
+  "s945": 135919,
+  "s946": 135920,
+  "s947": 135921,
+  "s948": 135922,
+  "s949": 135923,
+  "s950": 135924,
+  "s951": 135925,
+  "s952": 135926,
+  "s953": 135927,
+  "s954": 135928,
+  "s955": 135929,
+  "s956": 135930,
+  "s957": 135931,
+  "s958": 135932,
+  "s959": 135933,
+  "s960": 135934,
+  "s961": 135935,
+  "s962": 135936,
+  "s963": 135937,
+  "s964": 135938,
+  "s965": 135939,
+  "s966": 135940,
+  "s967": 135941,
+  "s968": 135942,
+  "s969": 135943,
+  "s970": 135944,
+  "s971": 135945,
+  "s972": 135946,
+  "s973": 135947,
+  "s974": 135948,
+  "s975": 135949,
+  "s976": 135950,
+  "s977": 135951,
+  "s978": 135952,
+  "s979": 135953,
+  "s980": 135954,
+  "s981": 135955,
+  "s982": 135956,
+  "s983": 135957,
+  "s984": 135958,
+  "s985": 135959,
+  "s986": 135960,
+  "s987": 135961,
+  "s988": 135962,
+  "s989": 135963,
+  "s990": 135964,
+  "s991": 135965,
+  "s992": 135966,
+  "s993": 135967,
+  "s994": 135968,
+  "s995": 135969,
+  "s996": 135970,
+  "s997": 135971,
+  "s998": 135972,
+  "s999": 135973
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,52 @@

+{% for message in messages if message.role == 'user' and message.content is iterable and message.content is not string %}
+    {% for item in message.content if item.type == 'image' %}
+        {{- '<image>' -}}
+    {% endfor %}
+{% endfor %}
+{{- '<|begin_of_sentence|>' -}}
+{%- set system_message = namespace(value=none) -%}
+{%- for message in messages if message.role == 'system' -%}
+    {%- set system_message.value = message.content -%}
+{%- endfor -%}
+{%- if system_message.value -%}
+    {{- system_message.value -}}
+{%- endif -%}
+{%- for message in messages -%}
+    {%- if message.role == "user" -%}
+        {{- '<|User|>' -}}
+        {%- if message.content is string -%}
+            {{- message.content -}}
+        {%- elif message.content is iterable and message.content is not string -%}
+            {%- for item in message.content if item.type == "text" -%}
+                {{- item.text -}}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif message.role == "assistant" -%}
+        {%- set thinking_tag = "" -%}
+        {%- if enable_thinking is defined -%}
+            {%- set thinking_tag = "</think>" if not enable_thinking else "<think>" -%}
+        {%- endif -%}
+        {{- '<|Assistant|>' + thinking_tag -}}
+        {%- if message.content is string -%}
+            {{- message.content -}}
+        {%- elif message.content is iterable and message.content is not string -%}
+            {%- for item in message.content if item.type == "text" -%}
+                {{- item.text -}}
+            {%- endfor -%}
+        {%- endif -%}
+        {{- '<|end_of_sentence|>' -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- '<|Assistant|>' -}}
+    {%- if enable_thinking is defined -%}
+        {{- "</think>" if not enable_thinking else "<think>" -}}
+    {%- endif -%}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,233 @@

+{
+  "architectures": [
+    "YuanVLChatModel"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_yuanvl.YuanVLChatConfig",
+    "AutoModel": "modeling_yuanvl_chat.YuanVLChatModel",
+    "AutoModelForCausalLM": "modeling_yuanvl_chat.YuanVLChatModel"
+  },
+  "bos_token_id": 134960,
+  "clip_download_path": "internvit2.5-300M",
+  "clip_model_name": "InternViT-448",
+  "downsample_ratio": 0.5,
+  "dynamic_image_size": true,
+  "eos_token_id": 77185,
+  "force_image_size": 448,
+  "imagemlp_recompute": true,
+  "img_context_token_id": 77188,
+  "llm_config": {
+    "_from_model_config": true,
+    "architectures": [
+      "YuanForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "attention_projection_size": 9216,
+    "attn_dropout": 0.0,
+    "attn_mask_type": "causal",
+    "auto_map": {
+      "AutoConfig": "configuration_yuanvl.YuanConfig",
+      "AutoModelForCausalLM": "yuanvl.YuanForCausalLM"
+    },
+    "bos_token": "<BOS>",
+    "bos_token_id": 134960,
+    "causal_mask": true,
+    "dropout": 0,
+    "eod_token": "<eod>",
+    "eod_token_id": 77185,
+    "ffn_hidden_size": 16384,
+    "head_dim": 256,
+    "hidden_act": "silu",
+    "hidden_size": 4608,
+    "initializer_range": 0.02,
+    "intermediate_size": 16384,
+    "lf_conv2d_add_bias": false,
+    "lf_conv2d_group": 1,
+    "lf_conv2d_num_pad": 0,
+    "mask_token_id": 77185,
+    "max_position_embeddings": 32768,
+    "model_max_length": 32768,
+    "model_type": "yuan",
+    "moe_config": {
+      "ffn_hidden_size": 16384,
+      "gated_linear_unit": true,
+      "moe_num_experts": 64,
+      "moe_top_k": 2,
+      "norm_topk_prob": true,
+      "per_layer_experts_blocks": [
+        40,
+        48,
+        24,
+        16,
+        24,
+        16,
+        16,
+        24,
+        32,
+        32,
+        24,
+        24,
+        24,
+        40,
+        40,
+        40,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        48,
+        40,
+        40,
+        40,
+        40,
+        40,
+        40,
+        40,
+        40,
+        32,
+        32,
+        32,
+        32,
+        32,
+        32,
+        24,
+        24,
+        16,
+        8
+      ],
+      "router_type": "linear"
+    },
+    "num_attention_heads": 36,
+    "num_hidden_layers": 103,
+    "num_key_value_heads": 36,
+    "num_query_groups": 36,
+    "output_router_logits": false,
+    "pad_token_id": 77188,
+    "perform_initialization": false,
+    "reset_attention_mask": false,
+    "reset_position_ids": false,
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 1000000.0,
+    "rotary_base": 1000000,
+    "rotary_percent": 1.0,
+    "sep_token": "<sep>",
+    "sep_token_id": 77187,
+    "tie_word_embeddings": false,
+    "tokenizer_class": "YuanVLTokenizer",
+    "torch_dtype": "bfloat16",
+    "use_bias": false,
+    "use_cache": true,
+    "use_flash_attention": true,
+    "use_lf_gate": true,
+    "use_lfa_bias": false,
+    "use_loss_mask": false,
+    "use_moe": true,
+    "use_rope_scaling": false,
+    "vocab_size": 136064
+  },
+  "max_dynamic_patch": 9,
+  "max_position_embeddings": 32768,
+  "min_dynamic_patch": 1,
+  "model_max_length": 32768,
+  "model_type": "yuanvl",
+  "output_attentions": false,
+  "pad_token_id": 77185,
+  "ps_version": "v2",
+  "select_layer": -1,
+  "template": "yuan-chat",
+  "torch_dtype": "bfloat16",
+  "transformers_version": null,
+  "use_backbone_lora": 0,
+  "use_llm_lora": 0,
+  "use_thumbnail": true,
+  "vision_config": {
+    "architectures": [
+      "InternVisionModel"
+    ],
+    "attention_dropout": 0.0,
+    "drop_path_rate": 0.0,
+    "dropout": 0.0,
+    "hidden_act": "gelu",
+    "hidden_size": 1024,
+    "image_size": 448,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-06,
+    "model_type": "intern_vit_6b",
+    "norm_type": "layer_norm",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "qk_normalization": false,
+    "qkv_bias": true,
+    "torch_dtype": "bfloat16",
+    "use_bfloat16": true,
+    "use_flash_attn": true
+  }
+}

configuration_intern_vit.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# --------------------------------------------------------
+# InternVL
+# Copyright (c) 2024 OpenGVLab
+# Licensed under The MIT License [see LICENSE for details]
+# --------------------------------------------------------
+import os
+from typing import Union
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class InternVisionConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`InternVisionModel`]. It is used to
+    instantiate a vision encoder according to the specified arguments, defining the model architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        num_channels (`int`, *optional*, defaults to 3):
+            Number of color channels in the input images (e.g., 3 for RGB).
+        patch_size (`int`, *optional*, defaults to 14):
+            The size (resolution) of each patch.
+        image_size (`int`, *optional*, defaults to 224):
+            The size (resolution) of each image.
+        qkv_bias (`bool`, *optional*, defaults to `False`):
+            Whether to add a bias to the queries and values in the self-attention layers.
+        hidden_size (`int`, *optional*, defaults to 3200):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_attention_heads (`int`, *optional*, defaults to 25):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 12800):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        qk_normalization (`bool`, *optional*, defaults to `True`):
+            Whether to normalize the queries and keys in the self-attention layers.
+        num_hidden_layers (`int`, *optional*, defaults to 48):
+            Number of hidden layers in the Transformer encoder.
+        use_flash_attn (`bool`, *optional*, defaults to `True`):
+            Whether to use flash attention mechanism.
+        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"selu"` and `"gelu_new"` ``"gelu"` are supported.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-6):
+            The epsilon used by the layer normalization layers.
+        dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        drop_path_rate (`float`, *optional*, defaults to 0.0):
+            Dropout rate for stochastic depth.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        initializer_factor (`float`, *optional*, defaults to 0.1):
+            A factor for layer scale.
+    """
+    model_type = 'intern_vit_6b'
+    def __init__(
+            self,
+            num_channels=3,
+            patch_size=14,
+            image_size=224,
+            qkv_bias=False,
+            hidden_size=3200,
+            num_attention_heads=25,
+            intermediate_size=12800,
+            qk_normalization=True,
+            num_hidden_layers=48,
+            use_flash_attn=True,
+            hidden_act='gelu',
+            norm_type='rms_norm',
+            layer_norm_eps=1e-6,
+            dropout=0.0,
+            drop_path_rate=0.0,
+            attention_dropout=0.0,
+            initializer_range=0.02,
+            initializer_factor=0.1,
+            **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.dropout = dropout
+        self.drop_path_rate = drop_path_rate
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_channels = num_channels
+        self.patch_size = patch_size
+        self.image_size = image_size
+        self.initializer_range = initializer_range
+        self.initializer_factor = initializer_factor
+        self.attention_dropout = attention_dropout
+        self.layer_norm_eps = layer_norm_eps
+        self.hidden_act = hidden_act
+        self.norm_type = norm_type
+        self.qkv_bias = qkv_bias
+        self.qk_normalization = qk_normalization
+        self.use_flash_attn = use_flash_attn
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> 'PretrainedConfig':
+        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
+        if 'vision_config' in config_dict:
+            config_dict = config_dict['vision_config']
+        if 'model_type' in config_dict and hasattr(cls, 'model_type') and config_dict['model_type'] != cls.model_type:
+            logger.warning(
+                f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+                f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
+            )
+        return cls.from_dict(config_dict, **kwargs)

configuration_yuan.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# coding=utf-8
+# Copyright YuanLabAi and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Yuan model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class YuanConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`YuanModel`]. It is used to instantiate an
+    Yuan model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the Yuan--v0.1 or Yuan.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 32000):
+            Vocabulary size of the Yuan model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`YuanModel`]
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 14336):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        num_key_value_heads (`int`, *optional*, defaults to 8):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
+        head_dim (`int`, *optional*, defaults to `hidden_size // num_attention_heads`):
+            The attention head dimension.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
+            The maximum sequence length that this model might ever be used with. Yuan's sliding window attention
+            allows sequence of up to 4096*32 tokens.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        pad_token_id (`int`, *optional*):
+            The id of the padding token.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            The id of the "beginning-of-sequence" token.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the "end-of-sequence" token.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether the model's input and output word embeddings should be tied.
+        rope_theta (`float`, *optional*, defaults to 1000000.0):
+            The base period of the RoPE embeddings.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+    """
+    model_type = "yuan"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    base_model_tp_plan = {
+        "layers.*.self_attn.q_proj": "colwise",
+        "layers.*.self_attn.k_proj": "colwise",
+        "layers.*.self_attn.v_proj": "colwise",
+        "layers.*.self_attn.o_proj": "rowwise",
+        "layers.*.block_sparse_moe.gate": "colwise_rep",  # we need to replicate here to correctly route experts
+        "layers.*.block_sparse_moe.experts.*.w1": "colwise",
+        "layers.*.block_sparse_moe.experts.*.w2": "rowwise",
+        "layers.*.block_sparse_moe.experts.*.w3": "colwise",
+    }
+    base_model_pp_plan = {
+        "embed_tokens": (["input_ids"], ["inputs_embeds"]),
+        "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
+        "norm": (["hidden_states"], ["hidden_states"]),
+    }
+    def __init__(
+        self,
+        vocab_size=135040,
+        hidden_size=2048,
+        intermediate_size=8192,
+        num_hidden_layers=24,
+        num_attention_heads=32,
+        num_key_value_heads=16,
+        head_dim=None,
+        hidden_act="silu",
+        max_position_embeddings=4096 * 32,
+        model_max_length=8192,
+        initializer_range=0.02,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=False,
+        rope_theta=1e6,
+        attention_dropout=0.0,
+        attention_projection_size=None,
+        perform_initialization=False,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.model_max_length = model_max_length
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+        self.attention_projection_size = attention_projection_size if attention_projection_size is not None else self.hidden_size
+        self.head_dim = head_dim if head_dim is not None else self.attention_projection_size // self.num_attention_heads
+        self.perform_initialization=perform_initialization
+        self.tie_word_embeddings = tie_word_embeddings
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            #tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )

configuration_yuanvl.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# --------------------------------------------------------
+# InternVL
+# Copyright (c) 2024 OpenGVLab
+# Licensed under The MIT License [see LICENSE for details]
+# --------------------------------------------------------
+import copy
+from transformers import AutoConfig, LlamaConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+from transformers.models.auto import CONFIG_MAPPING
+from .configuration_intern_vit import InternVisionConfig
+from .configuration_yuan import YuanConfig
+logger = logging.get_logger(__name__)
+class YuanVLChatConfig(PretrainedConfig):
+    model_type = 'yuanvl'
+    is_composition = True
+    sub_configs = {"llm_config": YuanConfig, "vision_config": InternVisionConfig}  # 声明子配置类型
+    def __init__(
+            self,
+            vision_config=None,
+            llm_config=None,
+            use_backbone_lora=0,
+            use_llm_lora=0,
+            select_layer=-1,
+            force_image_size=None,
+            downsample_ratio=0.5,
+            template=None,
+            dynamic_image_size=False,
+            use_thumbnail=False,
+            tie_word_embeddings=False,
+            ps_version='v1',
+            min_dynamic_patch=1,
+            max_dynamic_patch=6,
+            img_context_token_id=77188,** kwargs):
+        # 初始化视觉子配置（确保为InternVisionConfig实例）
+        if vision_config is None:
+            # 输入为None时，直接实例化InternVisionConfig（而非字典）
+            self.vision_config = InternVisionConfig(architectures=['InternVisionModel'])
+            logger.info('vision_config is None. Initializing InternVisionConfig with default values.')
+        elif isinstance(vision_config, dict):
+            # 输入为字典时，用from_dict实例化
+            self.vision_config = InternVisionConfig.from_dict(vision_config)
+        else:
+            # 输入已为实例时直接使用
+            self.vision_config = vision_config
+        # 初始化LLM子配置（确保为YuanConfig实例）
+        if llm_config is None:
+            # 输入为None时，直接实例化YuanConfig（而非字典）
+            self.llm_config = YuanConfig(architectures=['YuanForCausalLM'])
+            self.llm_config.tie_word_embeddings = tie_word_embeddings  # 显式设置属性
+            logger.info('llm_config is None. Initializing YuanConfig with default values.')
+        elif isinstance(llm_config, dict):
+            # 输入为字典时，用from_dict实例化
+            self.llm_config = YuanConfig.from_dict(llm_config)
+            self.llm_config.tie_word_embeddings = tie_word_embeddings
+        else:
+            # 输入已为实例时直接使用，并同步tie_word_embeddings
+            self.llm_config = llm_config
+            self.llm_config.tie_word_embeddings = tie_word_embeddings
+        # 其他属性初始化
+        self.use_backbone_lora = use_backbone_lora
+        self.use_llm_lora = use_llm_lora
+        self.select_layer = select_layer
+        self.force_image_size = force_image_size
+        self.downsample_ratio = downsample_ratio
+        self.template = template
+        self.dynamic_image_size = dynamic_image_size
+        self.use_thumbnail = use_thumbnail
+        self.ps_version = ps_version
+        self.min_dynamic_patch = min_dynamic_patch
+        self.max_dynamic_patch = max_dynamic_patch
+        self.img_context_token_id = img_context_token_id
+        self.tie_word_embeddings = self.llm_config.tie_word_embeddings  # 同步LLM的配置
+        # 日志输出
+        logger.info(f'vision_select_layer: {self.select_layer}')
+        logger.info(f'ps_version: {self.ps_version}')
+        logger.info(f'min_dynamic_patch: {self.min_dynamic_patch}')
+        logger.info(f'max_dynamic_patch: {self.max_dynamic_patch}')
+        super().__init__(**kwargs)
+    @classmethod
+    def from_sub_model_configs(
+        cls,
+        vision_config: InternVisionConfig,
+        llm_config: YuanConfig,
+        **kwargs,
+    ):
+        r"""
+        Instantiate a [`YuanVLChatConfig`] (or a derived class) from bark sub-models configuration.
+        Returns:
+            [``YuanVLChatConfig``]: An instance of a configuration object
+        """
+        return cls(
+            vision_config=vision_config.to_dict(),
+            llm_config=llm_config.to_dict(),
+            **kwargs,
+        )
+    def to_dict(self):
+        """
+        Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
+        Returns:
+            `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+        """
+        output = copy.deepcopy(self.__dict__)
+        output['vision_config'] = self.vision_config.to_dict()
+        output['llm_config'] = self.llm_config.to_dict()
+        output['model_type'] = self.__class__.model_type
+        output['use_backbone_lora'] = self.use_backbone_lora
+        output['use_llm_lora'] = self.use_llm_lora
+        output['select_layer'] = self.select_layer
+        output['force_image_size'] = self.force_image_size
+        output['downsample_ratio'] = self.downsample_ratio
+        output['template'] = self.template
+        output['dynamic_image_size'] = self.dynamic_image_size
+        output['use_thumbnail'] = self.use_thumbnail
+        output['ps_version'] = self.ps_version
+        output['min_dynamic_patch'] = self.min_dynamic_patch
+        output['max_dynamic_patch'] = self.max_dynamic_patch
+        return output

conversation.py ADDED Viewed

	@@ -0,0 +1,399 @@

+"""
+Conversation prompt templates.
+We kindly request that you import fastchat instead of copying this file if you wish to use it.
+If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
+Modified from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
+"""
+import dataclasses
+from enum import IntEnum, auto
+from typing import Dict, List, Tuple, Union
+class SeparatorStyle(IntEnum):
+    """Separator styles."""
+    ADD_COLON_SINGLE = auto()
+    ADD_COLON_TWO = auto()
+    ADD_COLON_SPACE_SINGLE = auto()
+    NO_COLON_SINGLE = auto()
+    NO_COLON_TWO = auto()
+    ADD_NEW_LINE_SINGLE = auto()
+    LLAMA2 = auto()
+    CHATGLM = auto()
+    CHATML = auto()
+    CHATINTERN = auto()
+    DOLLY = auto()
+    RWKV = auto()
+    PHOENIX = auto()
+    ROBIN = auto()
+    FALCON_CHAT = auto()
+    CHATGLM3 = auto()
+    INTERNVL_ZH = auto()
+    MPT = auto()
+@dataclasses.dataclass
+class Conversation:
+    """A class that manages prompt templates and keeps all conversation history."""
+    # The name of this template
+    name: str
+    # The template of the system prompt
+    system_template: str = '{system_message}'
+    # The system message
+    system_message: str = ''
+    # The names of two roles
+    roles: Tuple[str] = ('USER', 'ASSISTANT')
+    # All messages. Each item is (role, message).
+    messages: List[List[str]] = ()
+    # The number of few shot examples
+    offset: int = 0
+    # The separator style and configurations
+    sep_style: SeparatorStyle = SeparatorStyle.ADD_COLON_SINGLE
+    sep: str = '\n'
+    sep2: str = None
+    # Stop criteria (the default one is EOS token)
+    stop_str: Union[str, List[str]] = None
+    # Stops generation if meeting any token in this list
+    stop_token_ids: List[int] = None
+    def get_prompt(self) -> str:
+        """Get the prompt for generation."""
+        system_prompt = self.system_template.format(system_message=self.system_message)
+        if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
+            ret = system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ': ' + message + self.sep
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
+            seps = [self.sep, self.sep2]
+            ret = system_prompt + seps[0]
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ': ' + message + seps[i % 2]
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
+            ret = system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ': ' + message + self.sep
+                else:
+                    ret += role + ': '  # must be end with a space
+            return ret
+        elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
+            ret = '' if system_prompt == '' else system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + '\n' + message + self.sep
+                else:
+                    ret += role + '\n'
+            return ret
+        elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
+            ret = system_prompt
+            for role, message in self.messages:
+                if message:
+                    ret += role + message + self.sep
+                else:
+                    ret += role
+            return ret
+        elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
+            seps = [self.sep, self.sep2]
+            ret = system_prompt
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + message + seps[i % 2]
+                else:
+                    ret += role
+            return ret
+        elif self.sep_style == SeparatorStyle.RWKV:
+            ret = system_prompt
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += (
+                        role
+                        + ': '
+                        + message.replace('\r\n', '\n').replace('\n\n', '\n')
+                    )
+                    ret += '\n\n'
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.LLAMA2:
+            seps = [self.sep, self.sep2]
+            if self.system_message:
+                ret = system_prompt
+            else:
+                ret = '[INST] '
+            for i, (role, message) in enumerate(self.messages):
+                tag = self.roles[i % 2]
+                if message:
+                    if i == 0:
+                        ret += message + ' '
+                    else:
+                        ret += tag + ' ' + message + seps[i % 2]
+                else:
+                    ret += tag
+            return ret
+        elif self.sep_style == SeparatorStyle.CHATGLM:
+            # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
+            # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
+            round_add_n = 1 if self.name == 'chatglm2' else 0
+            if system_prompt:
+                ret = system_prompt + self.sep
+            else:
+                ret = ''
+            for i, (role, message) in enumerate(self.messages):
+                if i % 2 == 0:
+                    ret += f'[Round {i//2 + round_add_n}]{self.sep}'
+                if message:
+                    ret += f'{role}：{message}{self.sep}'
+                else:
+                    ret += f'{role}：'
+            return ret
+        elif self.sep_style == SeparatorStyle.CHATML:
+            ret = '' if system_prompt == '' else system_prompt + self.sep + '\n'
+            for role, message in self.messages:
+                if message:
+                    ret += role + '\n' + message + self.sep + '\n'
+                else:
+                    ret += role + '\n'
+            return ret
+        elif self.sep_style == SeparatorStyle.CHATGLM3:
+            ret = ''
+            if self.system_message:
+                ret += system_prompt
+            for role, message in self.messages:
+                if message:
+                    ret += role + '\n' + ' ' + message
+                else:
+                    ret += role
+            return ret
+        elif self.sep_style == SeparatorStyle.CHATINTERN:
+            # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
+            seps = [self.sep, self.sep2]
+            ret = system_prompt
+            for i, (role, message) in enumerate(self.messages):
+                # if i % 2 == 0:
+                #     ret += "<s>"
+                if message:
+                    ret += role + ':' + message + seps[i % 2] + '\n'
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.DOLLY:
+            seps = [self.sep, self.sep2]
+            ret = system_prompt
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ':\n' + message + seps[i % 2]
+                    if i % 2 == 1:
+                        ret += '\n\n'
+                else:
+                    ret += role + ':\n'
+            return ret
+        elif self.sep_style == SeparatorStyle.PHOENIX:
+            ret = system_prompt
+            for role, message in self.messages:
+                if message:
+                    ret += role + ': ' + '<s>' + message + '</s>'
+                else:
+                    ret += role + ': ' + '<s>'
+            return ret
+        elif self.sep_style == SeparatorStyle.ROBIN:
+            ret = system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ':\n' + message + self.sep
+                else:
+                    ret += role + ':\n'
+            return ret
+        elif self.sep_style == SeparatorStyle.FALCON_CHAT:
+            ret = ''
+            if self.system_message:
+                ret += system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ': ' + message + self.sep
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.INTERNVL_ZH:
+            seps = [self.sep, self.sep2]
+            ret = self.system_message + seps[0]
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ': ' + message + seps[i % 2]
+                else:
+                    ret += role + ':'
+            return ret
+        elif self.sep_style == SeparatorStyle.MPT:
+            ret = system_prompt + self.sep
+            for role, message in self.messages:
+                if message:
+                    if type(message) is tuple:
+                        message, _, _ = message
+                    ret += role + message + self.sep
+                else:
+                    ret += role
+            return ret
+        else:
+            raise ValueError(f'Invalid style: {self.sep_style}')
+    def set_system_message(self, system_message: str):
+        """Set the system message."""
+        self.system_message = system_message
+    def append_message(self, role: str, message: str):
+        """Append a new message."""
+        self.messages.append([role, message])
+    def update_last_message(self, message: str):
+        """Update the last output.
+        The last message is typically set to be None when constructing the prompt,
+        so we need to update it in-place after getting the response from a model.
+        """
+        self.messages[-1][1] = message
+    def to_gradio_chatbot(self):
+        """Convert the conversation to gradio chatbot format."""
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+    def to_openai_api_messages(self):
+        """Convert the conversation to OpenAI chat completion format."""
+        ret = [{'role': 'system', 'content': self.system_message}]
+        for i, (_, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                ret.append({'role': 'user', 'content': msg})
+            else:
+                if msg is not None:
+                    ret.append({'role': 'assistant', 'content': msg})
+        return ret
+    def copy(self):
+        return Conversation(
+            name=self.name,
+            system_template=self.system_template,
+            system_message=self.system_message,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            sep2=self.sep2,
+            stop_str=self.stop_str,
+            stop_token_ids=self.stop_token_ids,
+        )
+    def dict(self):
+        return {
+            'template_name': self.name,
+            'system_message': self.system_message,
+            'roles': self.roles,
+            'messages': self.messages,
+            'offset': self.offset,
+        }
+# A global registry for all conversation templates
+conv_templates: Dict[str, Conversation] = {}
+def register_conv_template(template: Conversation, override: bool = False):
+    """Register a new conversation template."""
+    if not override:
+        assert (
+            template.name not in conv_templates
+        ), f'{template.name} has been registered.'
+    conv_templates[template.name] = template
+def get_conv_template(name: str) -> Conversation:
+    """Get a conversation template."""
+    return conv_templates[name].copy()
+# Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
+# is that during training, the preprocessing function for the Hermes-2 template doesn't add
+# <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
+# Therefore, they are completely equivalent during inference.
+register_conv_template(
+    Conversation(
+        name='Hermes-2',
+        system_template='<|im_start|>system\n{system_message}',
+        # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>',
+        stop_str='<|endoftext|>',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='internlm2-chat',
+        system_template='<|im_start|>system\n{system_message}',
+        # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='phi3-chat',
+        system_template='<|system|>\n{system_message}',
+        # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
+        roles=('<|user|>\n', '<|assistant|>\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|end|>',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='internvl2_5',
+        system_template='<|im_start|>system\n{system_message}',
+        system_message='你是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>\n',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='yuan-chat',
+        system_template='<|im_start|>system\n{system_message}',
+        system_message='你是YuanLabAi-多模态模型，英文名是YuanVL，是由YuanLabAi开发的多模态大语言模型。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>\n',
+    )
+)

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff