{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2025-06-18 07:13:16,579] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/mnt/jeff/anaconda/compiler_compat/ld: cannot find -laio: No such file or directory\n", "collect2: error: ld returned 1 exit status\n", "/mnt/jeff/anaconda/compiler_compat/ld: warning: librt.so.1, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n", "/mnt/jeff/anaconda/compiler_compat/ld: warning: libpthread.so.0, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n", "/mnt/jeff/anaconda/compiler_compat/ld: warning: libstdc++.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n", "/mnt/jeff/anaconda/compiler_compat/ld: warning: libm.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for bool@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_logic_error(char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::logic_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::~locale()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::basic_string(std::string const&, unsigned long, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_end_catch@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ofstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::logic_error::~logic_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__si_class_type_info@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new[](unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak_hard()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ifstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf >::basic_streambuf(std::basic_streambuf > const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::basic_string(std::string const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned short@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::resize(unsigned long, char)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char const*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ctype::_M_widen_init() const@GLIBCXX_3.4.11'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_invalid_argument(char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::operator=(std::locale const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_free_exception@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::notify_one()@GLIBCXX_3.4.11'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::~Init()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::~basic_string()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_pure_virtual@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::flush()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__class_type_info@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_rethrow@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_fstream >::~basic_fstream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::compare(char const*) const@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::system_clock::now()@GLIBCXX_3.4.19'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ifstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Hash_bytes(void const*, unsigned long, unsigned long)@CXXABI_1.3.5'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(long long)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const@GLIBCXX_3.4.18'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::out_of_range@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(unsigned long)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::~ios_base()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::range_error::~range_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file::~__basic_file()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_acquire@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(bool)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::overflow_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_fstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::range_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_filebuf >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete[](void*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::basic_string(unsigned long, char, std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_transfer(std::__detail::_List_node_base*, std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::replace(unsigned long, unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::exception@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::_Rep::_M_destroy(std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream& std::istream::_M_extract(double&)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf >::close()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_fstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream >::basic_ifstream(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(std::string const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new(unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::domain_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char, unsigned long) const@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::put(char)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_alloc()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_thread_atexit@CXXABI_1.3.7'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream >::~basic_ifstream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::Init()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::condition_variable()@GLIBCXX_3.4.11'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf >::basic_filebuf()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::domain_error::~domain_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cerr@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char const*, unsigned long, unsigned long) const@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::basic_string(std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf, std::allocator >::str() const@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::invalid_argument@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(std::string const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostringstream, std::allocator >::~basic_ostringstream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_unhook()@GLIBCXX_3.4.15'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf, std::allocator >::_M_sync(char*, unsigned long, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream >::~basic_iostream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale(std::locale const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `log2f@GLIBC_2.2.5'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(std::basic_streambuf >*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::exception::~exception()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_create(unsigned long, unsigned long, std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file::is_open() const@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istringstream, std::allocator >::~basic_istringstream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::swap(std::string&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf >::basic_streambuf(std::basic_streambuf > const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios >::init(std::basic_streambuf >*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_cast()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios >::clear(std::_Ios_Iostate)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf >::operator=(std::basic_streambuf > const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete(void*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(int)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_M_destroy(std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream >::~basic_iostream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::runtime_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ofstream >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringstream, std::allocator >::~basic_stringstream()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(long)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::get()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long long@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream >& std::operator<< >(std::basic_ostream >&, char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::out_of_range::~out_of_range()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::length_error::~length_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream >& std::__ostream_insert >(std::basic_ostream >&, char const*, long)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::invalid_argument::~invalid_argument()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::swap(std::basic_string, std::allocator >&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cout@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(unsigned long long)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(void const*)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::underflow_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::out_of_range@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_allocate_exception@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void const*@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios >::init(std::basic_streambuf >*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::reserve(unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_begin_catch@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf >::open(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf, std::allocator >::_M_sync(wchar_t*, unsigned long, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::getline(char*, long, char)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istream >& std::getline, std::allocator >(std::basic_istream >&, std::basic_string, std::allocator >&, char)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::~condition_variable()@GLIBCXX_3.4.11'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf, std::allocator >@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::insert(unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(char const*, unsigned long)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned char@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::ios_base()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_out_of_range(char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::overflow_error::~overflow_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_length_error(char const*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_system_error(int)@GLIBCXX_3.4.11'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ofstream >::close()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert(double)@GLIBCXX_3.4.9'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf >::operator=(std::basic_streambuf > const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long long@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string, std::allocator >::basic_string(char const*, unsigned long, std::allocator const&)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream >::close()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_release@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_throw@CXXABI_1.3'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::underflow_error::~underflow_error()@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::length_error@GLIBCXX_3.4'\n", "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf >::~basic_filebuf()@GLIBCXX_3.4'\n", "collect2: error: ld returned 1 exit status\n", "/mnt/jeff/huggingface/modules/transformers_modules/speech_conformer_encoder.py:2775: FutureWarning: Please specify CheckpointImpl.NO_REENTRANT as CheckpointImpl.REENTRANT will soon be removed as the default and eventually deprecated.\n", " lambda i: encoder_checkpoint_wrapper(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "######################## speech lora #############\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f0f347f300534909b2d5db8ca3ea5df4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/5 [00:00, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.2565, 2.7995, 12.6682, ..., -4.1137, -4.1120, -4.1128]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0011, 0.0184, 0.0028, ..., 0.0006, -0.0128, -0.0151]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0011, 0.0184, 0.0028, ..., 0.0006, -0.0128, -0.0151]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.0887, -5.8322, 0.7939, ..., -0.5516, -0.5513, -0.5520]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0113, -0.0161, 0.0079, ..., 0.0197, -0.0008, -0.0082]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0113, -0.0161, 0.0079, ..., 0.0197, -0.0008, -0.0082]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 7.4639, 3.4059, 1.3521, ..., -2.6798, -2.6809, -2.6811]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.9671, -3.2369, 0.9566, ..., -1.4895, -1.4896, -1.4902]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0104, 0.0069, -0.0060, ..., 0.0010, 0.0042, -0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0104, 0.0069, -0.0060, ..., 0.0010, 0.0042, -0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.2403, 0.2873, 2.4648, ..., -1.0528, -1.0534, -1.0542]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.4957, 2.0423, 0.7912, ..., -2.3032, -2.3031, -2.3025]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0124, -0.0187, -0.0081, ..., -0.0090, -0.0092, 0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0124, -0.0187, -0.0081, ..., -0.0090, -0.0092, 0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8442, -0.7132, 1.0167, ..., -1.9491, -1.9485, -1.9490]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0172, -0.0028, ..., -0.0081, 0.0011, -0.0062]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0172, -0.0028, ..., -0.0081, 0.0011, -0.0062]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.7503, -1.2019, -2.7825, ..., -1.4378, -1.4368, -1.4375]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0204, -0.0112, 0.0025, ..., -0.0184, 0.0095, 0.0031]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0204, -0.0112, 0.0025, ..., -0.0184, 0.0095, 0.0031]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.7866, 5.1949, 1.4227, ..., -2.1833, -2.1827, -2.1830]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0131, -0.0034, -0.0005, ..., 0.0061, -0.0068, -0.0160]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0131, -0.0034, -0.0005, ..., 0.0061, -0.0068, -0.0160]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 7.4504, 0.9829, 3.5831, ..., -2.8693, -2.8691, -2.8692]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0030, 0.0056, 0.0034, ..., -0.0063, -0.0016, 0.0013]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0030, 0.0056, 0.0034, ..., -0.0063, -0.0016, 0.0013]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.1066, -0.4927, 0.0675, ..., -1.7429, -1.7428, -1.7433]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.7224, 1.7236, -0.3631, ..., -1.1573, -1.1571, -1.1564]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.9264, -1.4750, -0.0503, ..., -0.5115, -0.5112, -0.5118]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.2001, 0.3211, -0.7397, ..., -1.4245, -1.4242, -1.4246]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0009, -0.0044, -0.0095, ..., -0.0183, 0.0014, 0.0052]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0009, -0.0044, -0.0095, ..., -0.0183, 0.0014, 0.0052]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 6.0811, 1.3457, 3.4966, ..., -1.9978, -1.9977, -1.9982]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0067, -0.0047, 0.0002, ..., 0.0022, -0.0071, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0067, -0.0047, 0.0002, ..., 0.0022, -0.0071, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.3244, 5.0375, 10.6473, ..., -0.8292, -0.8280, -0.8281]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.5047, 2.7371, 3.7994, ..., 1.6258, 1.6262, 1.6260]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.9971, -2.0421, 1.8777, ..., -2.1895, -2.1888, -2.1893]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2119, 2.1697, 12.1325, ..., -2.3442, -2.3435, -2.3442]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.4543, 5.5360, 10.5822, ..., -2.5854, -2.5850, -2.5851]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.4849, 2.5870, 3.0230, ..., -1.4589, -1.4583, -1.4581]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.6099, -4.4040, 0.9232, ..., 0.1388, 0.1387, 0.1388]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0050, 0.0156, -0.0234, ..., 0.0087, -0.0071, 0.0013]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0050, 0.0156, -0.0234, ..., 0.0087, -0.0071, 0.0013]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.1205, 1.9262, 1.8272, ..., -1.6031, -1.6030, -1.6035]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0027, -0.0110, -0.0009, ..., -0.0010, -0.0021, -0.0044]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0027, -0.0110, -0.0009, ..., -0.0010, -0.0021, -0.0044]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.0034, 2.5199, 1.3652, ..., -1.9592, -1.9590, -1.9597]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0023, 0.0059, -0.0074, ..., -0.0142, -0.0020, -0.0064]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0023, 0.0059, -0.0074, ..., -0.0142, -0.0020, -0.0064]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.4204, 0.7696, 1.5233, ..., -1.0070, -1.0064, -1.0064]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.9769, 0.4621, 0.0646, ..., -2.0527, -2.0520, -2.0522]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.5167, -2.4095, 4.0113, ..., 0.4679, 0.4686, 0.4687]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 7.7175, 0.3446, -0.0167, ..., -1.2109, -1.2103, -1.2105]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.5951, -5.4225, -3.7345, ..., -1.0607, -1.0593, -1.0597]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0099, -0.0087, 0.0162, ..., 0.0126, 0.0192, -0.0121]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0099, -0.0087, 0.0162, ..., 0.0126, 0.0192, -0.0121]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.0541, -1.9950, 1.6296, ..., 2.7531, 2.7529, 2.7528]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.8740, -1.0866, 3.7133, ..., 0.7347, 0.7341, 0.7336]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.8324, -1.2917, -1.9766, ..., -0.7884, -0.7868, -0.7882]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[2.1346, 2.9151, 5.7393, ..., 0.6441, 0.6446, 0.6450]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.6980, -0.8688, 2.1842, ..., -2.0267, -2.0266, -2.0268]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0610, -3.9665, -1.8194, ..., -1.0435, -1.0427, -1.0431]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-3.1728, -9.4189, -4.4757, ..., 0.2838, 0.2840, 0.2837]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.9835, -0.5888, 5.5906, ..., 1.9771, 1.9764, 1.9766]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.1893, 5.9305, -1.5514, ..., 0.8957, 0.8958, 0.8948]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.0594, 5.3235, 0.9858, ..., -0.2280, -0.2277, -0.2280]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-4.4447, -7.1798, -2.1469, ..., 1.3249, 1.3259, 1.3248]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.1925, 7.5137, -1.4380, ..., -1.3681, -1.3679, -1.3685]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.1869, 7.4950, -1.0499, ..., -2.0301, -2.0299, -2.0307]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.2059, -5.9487, -1.4194, ..., 1.9609, 1.9607, 1.9604]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.3181, 2.1855, -0.2673, ..., 2.1398, 2.1397, 2.1388]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.9254, 4.2009, 0.6930, ..., 1.3047, 1.3048, 1.3044]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.4280, -2.5302, 3.9338, ..., 1.5977, 1.5980, 1.5976]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, 0.0138, -0.0315, ..., 0.0110, -0.0102, 0.0093]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, 0.0138, -0.0315, ..., 0.0110, -0.0102, 0.0093]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2573, -0.7269, 0.1401, ..., -0.7753, -0.7754, -0.7754]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.0096, 6.5689, 3.1045, ..., 0.0617, 0.0614, 0.0612]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.5081, 8.0743, -3.3691, ..., -0.8022, -0.8020, -0.8021]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.3255, -4.4397, 1.4442, ..., 1.8905, 1.8896, 1.8900]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.7291, 14.5658, 0.6573, ..., 0.8771, 0.8763, 0.8766]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-3.6418, 4.4336, 1.9554, ..., 0.9325, 0.9328, 0.9330]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-3.9466, -8.3290, -3.9492, ..., 0.4338, 0.4342, 0.4337]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 2.4567e-03, 4.6692e-03, -1.2695e-02, ..., 2.8163e-06,\n", " -4.5471e-03, 8.9111e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 2.4567e-03, 4.6692e-03, -1.2695e-02, ..., 2.8163e-06,\n", " -4.5471e-03, 8.9111e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 5.9396, 7.1487, 1.2435, ..., -0.0705, -0.0698, -0.0702]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0001, 0.0025, ..., -0.0083, -0.0006, 0.0138]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0001, 0.0025, ..., -0.0083, -0.0006, 0.0138]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 7.5412, 7.5700, 0.5203, ..., -1.4171, -1.4170, -1.4175]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2330, 0.7541, 5.9979, ..., -0.5614, -0.5596, -0.5596]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4387, -1.8750, -1.0399, ..., -1.9745, -1.9736, -1.9744]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.9822, -7.1292, -2.0205, ..., 0.2056, 0.2056, 0.2057]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0052, -0.0262, ..., 0.0052, -0.0012, -0.0031]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0052, -0.0262, ..., 0.0052, -0.0012, -0.0031]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7589, 1.3610, 1.3912, ..., -1.5507, -1.5498, -1.5514]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0014, -0.0012, -0.0172, ..., -0.0068, -0.0176, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0014, -0.0012, -0.0172, ..., -0.0068, -0.0176, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.9875, -2.4957, -3.1603, ..., -1.8431, -1.8425, -1.8430]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0099, -0.0087, 0.0162, ..., 0.0126, 0.0192, -0.0121]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0099, -0.0087, 0.0162, ..., 0.0126, 0.0192, -0.0121]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.0917, -2.3463, 0.8627, ..., 3.5735, 3.5729, 3.5730]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.7085, 2.2257, 3.1996, ..., -0.9459, -0.9459, -0.9463]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.9164, -4.1734, -2.0961, ..., -0.4731, -0.4724, -0.4733]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0215, -0.0002, ..., -0.0026, -0.0072, 0.0089]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0215, -0.0002, ..., -0.0026, -0.0072, 0.0089]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.0231, -1.2131, -1.5379, ..., -0.5193, -0.5186, -0.5198]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.5671, 0.6028, 0.6565, ..., -2.9617, -2.9612, -2.9623]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0003, -0.0167, 0.0054, ..., -0.0063, 0.0168, -0.0089]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0003, -0.0167, 0.0054, ..., -0.0063, 0.0168, -0.0089]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.9811, -2.6383, -2.2409, ..., -0.1538, -0.1534, -0.1533]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0043, -0.0212, 0.0034, ..., 0.0007, 0.0024, 0.0067]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0043, -0.0212, 0.0034, ..., 0.0007, 0.0024, 0.0067]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2077, -4.2061, -2.5550, ..., -1.1109, -1.1103, -1.1108]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.5544, -3.6029, -0.9975, ..., -0.7386, -0.7386, -0.7391]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0053, 0.0085, -0.0011, ..., 0.0109, 0.0017, -0.0053]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0053, 0.0085, -0.0011, ..., 0.0109, 0.0017, -0.0053]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.7994, 2.7957, 3.5508, ..., -1.8706, -1.8699, -1.8698]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.1607, -2.5416, -1.9401, ..., -2.2790, -2.2785, -2.2785]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0006, 0.0131, 0.0123, ..., -0.0098, 0.0084, 0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0006, 0.0131, 0.0123, ..., -0.0098, 0.0084, 0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.6444, 0.8783, -0.6482, ..., -2.0336, -2.0326, -2.0335]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0140, -0.0036, 0.0131, ..., -0.0157, 0.0089, -0.0093]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0140, -0.0036, 0.0131, ..., -0.0157, 0.0089, -0.0093]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.5079, -1.1467, 2.4725, ..., -2.3392, -2.3386, -2.3387]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0227, -0.0025, ..., -0.0023, -0.0020, -0.0031]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0227, -0.0025, ..., -0.0023, -0.0020, -0.0031]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0357, -1.3614, -1.9151, ..., -2.0196, -2.0184, -2.0186]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0124, -0.0024, 0.0277, ..., -0.0075, -0.0117, -0.0106]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0124, -0.0024, 0.0277, ..., -0.0075, -0.0117, -0.0106]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7580, 2.0423, -0.3424, ..., -0.8082, -0.8071, -0.8080]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.1586, 2.1668, 0.7074, ..., -1.0520, -1.0506, -1.0520]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.3720, 2.2316, -0.6996, ..., -1.9173, -1.9168, -1.9169]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.1942, 1.4668, -0.9189, ..., -2.0954, -2.0946, -2.0956]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0031, 0.0244, 0.0086, ..., -0.0096, -0.0003, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0031, 0.0244, 0.0086, ..., -0.0096, -0.0003, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.6506, 0.6830, -1.7550, ..., -1.3549, -1.3537, -1.3547]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0074, 0.0080, 0.0114, ..., -0.0024, -0.0217, 0.0145]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0074, 0.0080, 0.0114, ..., -0.0024, -0.0217, 0.0145]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.7167, -1.9204, -2.9044, ..., -1.6527, -1.6526, -1.6526]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.7033, -5.2305, -1.1888, ..., -1.9244, -1.9236, -1.9244]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0152, 0.0198, 0.0049, ..., 0.0122, -0.0074, 0.0068]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0152, 0.0198, 0.0049, ..., 0.0122, -0.0074, 0.0068]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.6288, 1.3090, -0.4249, ..., -3.0354, -3.0352, -3.0351]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.7025, -3.5614, -4.7877, ..., -1.3447, -1.3446, -1.3459]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7389, -0.3433, 0.2352, ..., -1.5533, -1.5532, -1.5534]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.6118, 1.9146, -2.2677, ..., -1.5224, -1.5219, -1.5221]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.0538, 0.1847, -0.0066, ..., -1.2810, -1.2807, -1.2814]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0116, 0.0066, -0.0007, ..., -0.0010, -0.0170, 0.0112]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0116, 0.0066, -0.0007, ..., -0.0010, -0.0170, 0.0112]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.5418, -0.5133, -2.0605, ..., -1.6131, -1.6124, -1.6129]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, -0.0028, 0.0010, ..., -0.0007, -0.0136, 0.0014]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, -0.0028, 0.0010, ..., -0.0007, -0.0136, 0.0014]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.0946, 2.1488, -2.5894, ..., -1.0617, -1.0614, -1.0617]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.1083, -1.4027, -0.0926, ..., -1.7811, -1.7810, -1.7805]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4851, 1.3151, 2.2046, ..., -1.4687, -1.4678, -1.4687]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7209, 1.3604, 1.6992, ..., -1.0811, -1.0805, -1.0813]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.7127, 1.9661, 5.3323, ..., -1.2006, -1.1996, -1.1994]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[1.6295, 4.9053, 6.9591, ..., 0.7427, 0.7442, 0.7442]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.2234, 2.8452, 2.4710, ..., 0.3933, 0.3946, 0.3943]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.7097, -0.2919, 2.7117, ..., -1.8346, -1.8338, -1.8346]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.5411, -1.1590, 5.3431, ..., -2.7299, -2.7287, -2.7304]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.5263e-03, 2.7761e+00, 2.9797e+00, ..., -2.1910e+00,\n", " -2.1894e+00, -2.1901e+00]]], device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.0052, -2.5844, 1.9069, ..., -2.1225, -2.1221, -2.1225]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.0586, -6.4708, 1.5275, ..., -0.8475, -0.8479, -0.8481]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0138, 0.3866, 0.0886, ..., -0.7036, -0.7033, -0.7035]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, 0.0282, -0.0019, ..., -0.0020, -0.0159, 0.0023]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, 0.0282, -0.0019, ..., -0.0020, -0.0159, 0.0023]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[1.0246, 1.9234, 4.0165, ..., 2.2692, 2.2693, 2.2690]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0223, 0.0052, 0.0098, ..., -0.0064, -0.0166, 0.0056]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0223, 0.0052, 0.0098, ..., -0.0064, -0.0166, 0.0056]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.3331, -2.7439, 0.0976, ..., -1.3493, -1.3491, -1.3491]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.6559, 3.5249, 5.2717, ..., -0.0967, -0.0954, -0.0960]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0013, 0.0151, -0.0018, ..., 0.0022, 0.0057, -0.0040]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0013, 0.0151, -0.0018, ..., 0.0022, 0.0057, -0.0040]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.5113, -0.5395, 2.2768, ..., -2.2552, -2.2549, -2.2555]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0030, -0.0137, -0.0044, ..., -0.0031, -0.0102, 0.0090]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0030, -0.0137, -0.0044, ..., -0.0031, -0.0102, 0.0090]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.5734, 0.8078, -1.6174, ..., -1.6196, -1.6188, -1.6205]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.1237, 2.2872, 1.2362, ..., -2.7545, -2.7537, -2.7542]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.3226, 2.4809, 0.4301, ..., -1.7120, -1.7113, -1.7122]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, 0.0223, -0.0112, ..., 0.0092, -0.0005, 0.0037]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, 0.0223, -0.0112, ..., 0.0092, -0.0005, 0.0037]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.7190, -0.7898, -1.3163, ..., -0.8909, -0.8900, -0.8909]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[4.3945, 2.9410, 8.2311, ..., 0.2492, 0.2502, 0.2505]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 5.5108, -0.8515, 2.6811, ..., -1.6113, -1.6104, -1.6109]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.5121, -3.6095, -1.5243, ..., -1.2095, -1.2087, -1.2094]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0022, 0.0114, 0.0063, ..., -0.0049, 0.0018, 0.0047]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0022, 0.0114, 0.0063, ..., -0.0049, 0.0018, 0.0047]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-4.4707, -2.5356, 2.7678, ..., 5.6256, 5.6249, 5.6255]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0214, -0.0051, -0.0049, ..., 0.0039, 0.0325, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.4477, -0.9632, 1.7093, ..., 0.9068, 0.9065, 0.9059]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.2515, 3.4878, 1.0475, ..., -3.0656, -3.0651, -3.0650]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.3882, 3.6918, 7.2423, ..., -0.6630, -0.6625, -0.6620]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.4627, 1.4199, 2.8800, ..., -3.5673, -3.5674, -3.5671]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 8.3884, -0.8113, 2.9059, ..., -1.3511, -1.3511, -1.3513]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.0293, -5.4142, -3.2402, ..., -0.7023, -0.7029, -0.7030]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.3591, -3.9261, 4.2697, ..., 2.2377, 2.2366, 2.2365]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 5.3172, 7.5477, 2.0222, ..., -0.1582, -0.1582, -0.1591]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.1456, 6.1462, 0.7585, ..., -1.5087, -1.5084, -1.5093]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.4931, -4.4966, -0.7325, ..., 2.1651, 2.1660, 2.1653]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.3028, 12.1057, 0.2260, ..., -0.6559, -0.6560, -0.6563]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.3868, -1.7872, 1.9420, ..., 1.5993, 1.6007, 1.6006]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.8082, -2.9429, 1.0673, ..., -1.7794, -1.7791, -1.7795]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.7408, -4.7217, -1.8763, ..., -1.5585, -1.5580, -1.5585]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0029, -0.0177, -0.0141, ..., 0.0016, -0.0153, 0.0040]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0029, -0.0177, -0.0141, ..., 0.0016, -0.0153, 0.0040]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.9456, 1.5991, 0.6526, ..., -1.1952, -1.1943, -1.1957]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.9732, 3.8609, -0.1656, ..., -1.6304, -1.6297, -1.6301]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.0709, 6.6360, -2.6514, ..., -1.3113, -1.3105, -1.3115]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, 0.0223, -0.0112, ..., 0.0092, -0.0005, 0.0037]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, 0.0223, -0.0112, ..., 0.0092, -0.0005, 0.0037]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.2206, 2.4472, -1.9566, ..., -1.2133, -1.2132, -1.2142]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.5176, -0.9849, 0.3258, ..., -0.7949, -0.7943, -0.7946]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.5004, 2.6596, -1.1331, ..., -1.6462, -1.6451, -1.6453]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0042, -0.0126, 0.0064, ..., -0.0089, 0.0113, -0.0028]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0042, -0.0126, 0.0064, ..., -0.0089, 0.0113, -0.0028]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.3725, 0.2594, -0.6799, ..., -2.3497, -2.3498, -2.3498]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.4679, 1.8144, -1.9817, ..., -1.5515, -1.5499, -1.5511]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0004, -0.0193, -0.0104, ..., 0.0118, 0.0016, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0004, -0.0193, -0.0104, ..., 0.0118, 0.0016, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.7208, -2.1970, -5.0860, ..., -0.7863, -0.7854, -0.7858]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0027, -0.0110, -0.0009, ..., -0.0010, -0.0021, -0.0044]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0027, -0.0110, -0.0009, ..., -0.0010, -0.0021, -0.0044]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.8722, 3.4382, -2.5546, ..., -1.7179, -1.7178, -1.7180]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0081, 0.0101, 0.0019, ..., -0.0071, -0.0009, 0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0081, 0.0101, 0.0019, ..., -0.0071, -0.0009, 0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[2.7599, 2.2939, 1.3147, ..., 0.3591, 0.3597, 0.3598]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0170, 0.0295, -0.0204, ..., -0.0018, 0.0102, -0.0142]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0170, 0.0295, -0.0204, ..., -0.0018, 0.0102, -0.0142]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2763, 1.7812, 1.3218, ..., -1.4423, -1.4419, -1.4413]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0037, 0.0040, 0.0041, ..., -0.0101, 0.0089, 0.0243]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0037, 0.0040, 0.0041, ..., -0.0101, 0.0089, 0.0243]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.9206, 1.9246, -2.1905, ..., -2.3396, -2.3392, -2.3392]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0054, 0.0061, 0.0085, ..., -0.0032, 0.0020, -0.0164]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0054, 0.0061, 0.0085, ..., -0.0032, 0.0020, -0.0164]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.5286, -5.5253, -0.2846, ..., -0.6745, -0.6743, -0.6751]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0075, -0.0050, 0.0016, ..., 0.0295, -0.0137, 0.0021]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0075, -0.0050, 0.0016, ..., 0.0295, -0.0137, 0.0021]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.6344, -0.7373, -4.8042, ..., -2.3493, -2.3480, -2.3489]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-2.3007e-05, 5.9891e-04, -5.3787e-04, ..., 4.5776e-04,\n", " 5.6839e-04, -2.7275e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.0371, -0.1779, -3.9866, ..., -1.1455, -1.1447, -1.1456]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0040, 0.0074, -0.0010, ..., -0.0082, -0.0087, 0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0040, 0.0074, -0.0010, ..., -0.0082, -0.0087, 0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.2521, -0.1333, -1.6064, ..., -1.0620, -1.0613, -1.0620]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.2903, 0.4645, -3.0411, ..., -2.6449, -2.6442, -2.6444]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0004, -0.0193, -0.0104, ..., 0.0118, 0.0016, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0004, -0.0193, -0.0104, ..., 0.0118, 0.0016, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-4.9022, -4.6103, -2.0388, ..., -0.4406, -0.4408, -0.4403]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0101, -0.0058, 0.0087, ..., 0.0025, 0.0063, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0101, -0.0058, 0.0087, ..., 0.0025, 0.0063, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.6558, 1.2308, 0.7264, ..., -1.8384, -1.8384, -1.8379]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0051, -0.0004, -0.0248, ..., -0.0130, 0.0147, 0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0051, -0.0004, -0.0248, ..., -0.0130, 0.0147, 0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.0661, 0.3916, -0.5686, ..., -0.3410, -0.3407, -0.3404]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0037, 0.0040, 0.0041, ..., -0.0101, 0.0089, 0.0243]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0037, 0.0040, 0.0041, ..., -0.0101, 0.0089, 0.0243]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.9579, -0.6263, -4.4607, ..., -1.1866, -1.1857, -1.1863]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.9053, 3.8153, 4.1856, ..., -1.6572, -1.6563, -1.6560]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4512, 10.2327, 10.2727, ..., -1.4624, -1.4608, -1.4608]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.3579, 2.7078, 2.8498, ..., -0.8039, -0.8028, -0.8030]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4487, -1.7388, 0.6734, ..., -2.5842, -2.5833, -2.5839]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.3108, -3.4314, -0.4240, ..., -2.4828, -2.4824, -2.4833]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.7512, 2.8551, 5.2249, ..., -4.5689, -4.5679, -4.5680]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.7771, 1.3997, 4.2881, ..., -1.6369, -1.6365, -1.6367]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.3893, -3.3161, -0.4270, ..., -0.5903, -0.5900, -0.5896]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 1.1396e-04, -1.0925e-02, -4.6692e-03, ..., -4.0054e-05,\n", " 8.4839e-03, -1.3962e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 1.1396e-04, -1.0925e-02, -4.6692e-03, ..., -4.0054e-05,\n", " 8.4839e-03, -1.3962e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.9743, 0.6660, -0.0290, ..., -1.9516, -1.9513, -1.9520]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.0455, -2.0126, 0.0682, ..., -1.5931, -1.5925, -1.5937]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0046, 0.0154, -0.0052, ..., -0.0084, -0.0023, 0.0017]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0046, 0.0154, -0.0052, ..., -0.0084, -0.0023, 0.0017]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.6990, -1.0435, -2.6874, ..., -1.9581, -1.9576, -1.9586]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0050, -0.0138, -0.0035, ..., 0.0022, 0.0170, 0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0050, -0.0138, -0.0035, ..., 0.0022, 0.0170, 0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4539, -0.1370, -2.6062, ..., -2.5004, -2.4999, -2.5007]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.4726, -3.8662, -0.6862, ..., -1.9741, -1.9731, -1.9732]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[0.5781, 2.9712, 2.6818, ..., 1.1774, 1.1777, 1.1775]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.9239, -1.8706, 2.3316, ..., -1.6806, -1.6799, -1.6804]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.4819, -2.3683, -2.9432, ..., -3.1436, -3.1436, -3.1437]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-6.9702, -4.1871, -2.9567, ..., 4.0093, 4.0097, 4.0094]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0016, 0.0124, -0.0045, ..., -0.0177, 0.0157, -0.0016]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0016, 0.0124, -0.0045, ..., -0.0177, 0.0157, -0.0016]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.4819, -0.1222, 0.9041, ..., -2.2495, -2.2487, -2.2498]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.1710, 2.7974, 5.8687, ..., -0.8092, -0.8088, -0.8088]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.8183, 1.2459, 2.3437, ..., -3.6215, -3.6216, -3.6215]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.8976, -2.7700, -0.9937, ..., -0.6450, -0.6448, -0.6452]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.0357, -0.7376, -0.6203, ..., -1.3933, -1.3933, -1.3935]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.1083, -0.0326, 6.0396, ..., 0.6390, 0.6386, 0.6379]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.9416, 9.7112, -0.1328, ..., -0.5209, -0.5205, -0.5214]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.0259, 6.5028, -0.2255, ..., -1.2999, -1.2999, -1.3005]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.6241, -2.6318, 0.6842, ..., 2.0682, 2.0692, 2.0684]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0032, -0.0012, -0.0282, ..., -0.0007, 0.0082, -0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.0535, 8.5590, -0.4892, ..., -0.4010, -0.4010, -0.4017]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8543, 3.9463, -3.3922, ..., -1.5590, -1.5586, -1.5599]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.8115, -3.9955, -0.3136, ..., 1.1152, 1.1144, 1.1138]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[2.3460, 5.7076, 1.1687, ..., 0.6429, 0.6428, 0.6422]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.7647, 12.2807, 0.1629, ..., 0.3709, 0.3711, 0.3702]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.2943, -4.5870, 1.4900, ..., 0.8882, 0.8882, 0.8881]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0175, -0.0078, 0.0221, ..., 0.0123, 0.0413, 0.0099]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0175, -0.0078, 0.0221, ..., 0.0123, 0.0413, 0.0099]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.3831, 1.1685, 3.9708, ..., -0.8285, -0.8273, -0.8282]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0138, 0.0064, -0.0227, ..., 0.0118, 0.0052, 0.0091]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0138, 0.0064, -0.0227, ..., 0.0118, 0.0052, 0.0091]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8709, 4.5433, 0.6185, ..., -1.9250, -1.9253, -1.9251]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0041, -0.0078, -0.0234, ..., 0.0294, -0.0097, -0.0128]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0041, -0.0078, -0.0234, ..., 0.0294, -0.0097, -0.0128]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.2090, 6.5949, 3.7240, ..., -1.3317, -1.3317, -1.3310]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0076, -0.0053, 0.0005, ..., -0.0067, -0.0006, 0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0076, -0.0053, 0.0005, ..., -0.0067, -0.0006, 0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.9188, 8.6848, 2.9049, ..., 0.2833, 0.2833, 0.2834]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.3274, 10.5578, -0.1576, ..., -1.1242, -1.1239, -1.1246]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.1783, -4.0782, 0.1071, ..., 1.6773, 1.6766, 1.6764]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[4.9678, 7.2635, 2.3780, ..., 0.6923, 0.6922, 0.6921]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.8137, 4.4803, 1.8996, ..., -0.8706, -0.8703, -0.8707]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.2562, -3.7675, -0.4150, ..., 0.9767, 0.9770, 0.9767]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0022, 0.0466, -0.0109, ..., 0.0036, 0.0116, -0.0139]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0022, 0.0466, -0.0109, ..., 0.0036, 0.0116, -0.0139]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.3134, 0.6528, 0.7307, ..., -1.6017, -1.6017, -1.6025]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.6074, 1.4797, -0.4538, ..., -0.6367, -0.6364, -0.6372]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 3.2806e-03, 4.7445e-05, -6.9580e-03, ..., -2.2736e-03,\n", " -1.3428e-03, 6.6528e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 3.2806e-03, 4.7445e-05, -6.9580e-03, ..., -2.2736e-03,\n", " -1.3428e-03, 6.6528e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.7096, -0.8454, -3.1697, ..., -0.4315, -0.4304, -0.4312]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.8151, 9.7638, 0.0560, ..., -0.7380, -0.7373, -0.7385]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0035, -0.0209, -0.0248, ..., -0.0223, 0.0069, -0.0127]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.1995, 1.4968, 4.4446, ..., 0.1775, 0.1789, 0.1787]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0067, 0.0127, 0.0075, ..., 0.0030, -0.0160, -0.0062]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 6.1362, 0.2864, 2.6334, ..., -1.6718, -1.6712, -1.6719]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.6068, -5.5754, -1.2902, ..., -1.7314, -1.7304, -1.7322]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0262, -0.0088, 0.0093, ..., 0.0116, 0.0063, -0.0068]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0262, -0.0088, 0.0093, ..., 0.0116, 0.0063, -0.0068]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8275, 2.6545, -0.7779, ..., -2.2056, -2.2054, -2.2069]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 5.7640, 5.2232, 4.5474, ..., -1.8150, -1.8142, -1.8153]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0021, -0.0007, 0.0070, ..., -0.0084, -0.0083, -0.0017]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.8111, 1.2571, 1.9526, ..., -1.9218, -1.9215, -1.9224]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.3309, -2.7859, 13.8804, ..., -1.3599, -1.3589, -1.3596]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0203, 0.0012, 0.0066, ..., -0.0087, 0.0025, 0.0040]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0203, 0.0012, 0.0066, ..., -0.0087, 0.0025, 0.0040]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.6407, 1.1192, -1.7604, ..., -1.5145, -1.5141, -1.5145]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0071, 0.0157, 0.0038, ..., 0.0063, 0.0005, -0.0036]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.3965, 0.5229, -0.6186, ..., -2.0287, -2.0280, -2.0277]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0074, 0.0080, 0.0114, ..., -0.0024, -0.0217, 0.0145]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0074, 0.0080, 0.0114, ..., -0.0024, -0.0217, 0.0145]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.8172, -1.1813, -1.8141, ..., -0.1950, -0.1939, -0.1943]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0122, 0.0004, ..., -0.0025, 0.0162, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0122, 0.0004, ..., -0.0025, 0.0162, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.3440, 1.0741, 1.3180, ..., -2.1748, -2.1752, -2.1749]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.6637, 0.9051, 4.6422, ..., -1.5390, -1.5384, -1.5388]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0046, -0.0117, -0.0055, ..., -0.0015, 0.0047, -0.0038]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0046, -0.0117, -0.0055, ..., -0.0015, 0.0047, -0.0038]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.4761, -0.3373, -0.9150, ..., -2.6580, -2.6564, -2.6571]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0030, 0.0056, 0.0034, ..., -0.0063, -0.0016, 0.0013]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0030, 0.0056, 0.0034, ..., -0.0063, -0.0016, 0.0013]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.3808, 2.6872, 0.5701, ..., -1.2466, -1.2455, -1.2461]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0036, -0.0010, -0.0231, ..., 0.0099, 0.0045, -0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.2630, 1.2580, -2.2322, ..., -2.3452, -2.3450, -2.3454]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, -0.0028, 0.0010, ..., -0.0007, -0.0136, 0.0014]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, -0.0028, 0.0010, ..., -0.0007, -0.0136, 0.0014]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.4661, -0.9020, -2.5675, ..., 0.4146, 0.4149, 0.4144]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0010, -0.0075, -0.0001, ..., 0.0001, -0.0011, -0.0019]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.8883, -0.2316, -0.5896, ..., -1.3888, -1.3883, -1.3886]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0029, -0.0006, -0.0219, ..., -0.0032, 0.0011, 0.0075]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.7277, 1.3783, 2.5376, ..., -2.0539, -2.0537, -2.0535]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0034, 0.0038, -0.0043, ..., -0.0071, 0.0030, -0.0153]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8609, -0.3408, 1.0878, ..., -1.5348, -1.5337, -1.5343]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 7.8201e-05, -9.9487e-03, -1.1368e-03, ..., -4.2534e-04,\n", " -8.6670e-03, 7.5531e-04]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.4972, 5.1805, 3.6910, ..., -0.6999, -0.6987, -0.6979]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0186, 0.0043, 0.0227, ..., 0.0088, -0.0049, 0.0025]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.8252, 9.5029, 10.3238, ..., -0.6844, -0.6829, -0.6832]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0112, -0.0073, 0.0118, ..., -0.0182, 0.0040, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.1049, 4.7016, 4.3922, ..., -0.4317, -0.4307, -0.4303]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.5149, 2.9946, 4.3911, ..., -3.0439, -3.0432, -3.0438]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0093, -0.0107, 0.0162, ..., -0.0099, 0.0160, 0.0139]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.0974, 1.9277, 5.0944, ..., -4.0628, -4.0621, -4.0628]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0012, -0.0006, 0.0006, ..., -0.0002, 0.0013, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.5836, 3.3054, 1.9075, ..., -3.2141, -3.2134, -3.2133]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0039, -0.0035, ..., -0.0044, 0.0006, 0.0049]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 7.1533, 2.5894, 5.9803, ..., -1.6255, -1.6251, -1.6254]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0015, -0.0030, 0.0022, ..., 0.0002, -0.0110, 0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.1198, -0.2861, 3.2459, ..., -2.2151, -2.2150, -2.2142]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0028, 0.0032, -0.0081, ..., -0.0072, -0.0020, -0.0025]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0028, 0.0032, -0.0081, ..., -0.0072, -0.0020, -0.0025]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0943, 0.6041, 2.0801, ..., -3.0354, -3.0350, -3.0357]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.1807, -3.9768, -0.3748, ..., -0.9570, -0.9561, -0.9573]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0182, -0.0084, ..., 0.0337, -0.0041, 0.0007]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0182, -0.0084, ..., 0.0337, -0.0041, 0.0007]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.3068, -0.9960, 0.3759, ..., -2.8905, -2.8900, -2.8905]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0119, -0.0023, -0.0126, ..., -0.0117, -0.0003, 0.0177]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.5487, -5.7319, -2.3799, ..., -2.3979, -2.3974, -2.3979]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0063, 0.0005, -0.0061, ..., 0.0058, 0.0203, 0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.3998, 3.7878, 4.8889, ..., -0.8259, -0.8251, -0.8254]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0635, 0.1142, 3.3773, ..., -1.8667, -1.8662, -1.8672]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-2.9541, -1.8064, -0.1402, ..., -1.4772, -1.4775, -1.4780]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0036, 0.0184, 0.0156, ..., -0.0067, 0.0151, 0.0086]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-7.6021, -4.5455, -2.9761, ..., 5.3517, 5.3518, 5.3520]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0016, 0.0124, -0.0045, ..., -0.0177, 0.0157, -0.0016]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0016, 0.0124, -0.0045, ..., -0.0177, 0.0157, -0.0016]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7094, -0.8698, 2.3904, ..., -0.1038, -0.1039, -0.1040]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0005, -0.0005, ..., -0.0008, -0.0034, -0.0003]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0032, -0.0005, -0.0005, ..., -0.0008, -0.0034, -0.0003]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.6520, -4.8953, -0.8332, ..., 0.9291, 0.9291, 0.9294]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0165, 0.0010, -0.0104, ..., -0.0103, -0.0261, -0.0034]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0165, 0.0010, -0.0104, ..., -0.0103, -0.0261, -0.0034]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.2453, 0.4375, -1.7249, ..., -1.6248, -1.6238, -1.6252]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0227, -0.0025, ..., -0.0023, -0.0020, -0.0031]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0227, -0.0025, ..., -0.0023, -0.0020, -0.0031]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.4757, -0.7503, -1.9700, ..., -2.0137, -2.0135, -2.0138]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0122, 0.0004, ..., -0.0025, 0.0162, -0.0006]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0122, 0.0004, ..., -0.0025, 0.0162, -0.0006]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.7818, -2.3262, -0.9862, ..., -1.3396, -1.3397, -1.3396]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0215, -0.0002, ..., -0.0026, -0.0072, 0.0089]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0033, 0.0215, -0.0002, ..., -0.0026, -0.0072, 0.0089]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.7525, -1.5498, -2.6024, ..., -1.0834, -1.0838, -1.0845]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0081, 0.0101, 0.0019, ..., -0.0071, -0.0009, 0.0015]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0081, 0.0101, 0.0019, ..., -0.0071, -0.0009, 0.0015]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.1721, -1.0723, -2.7348, ..., 1.8423, 1.8424, 1.8424]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0082, 0.0045, 0.0023, ..., 0.0170, 0.0035, -0.0029]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0082, 0.0045, 0.0023, ..., 0.0170, 0.0035, -0.0029]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.6988, -1.1185, 0.1716, ..., -2.3506, -2.3497, -2.3500]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0025, 0.0055, 0.0018, ..., -0.0153, 0.0010, 0.0099]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0025, 0.0055, 0.0018, ..., -0.0153, 0.0010, 0.0099]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 3.2097, 5.1007, 2.9693, ..., -1.6334, -1.6341, -1.6340]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0008, -0.0077, 0.0013, ..., 0.0006, -0.0118, -0.0026]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.8747, 4.9425, 4.2474, ..., -1.9139, -1.9134, -1.9132]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0113, 0.0221, -0.0044, ..., -0.0312, -0.0042, -0.0135]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 2.8550, 1.5938, 3.0359, ..., -3.1838, -3.1840, -3.1834]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0038, -0.0038, -0.0035, ..., -0.0061, 0.0021, 0.0010]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 6.0799, -2.6831, -0.4087, ..., -1.6919, -1.6918, -1.6921]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0018, 0.0068, 0.0076, ..., -0.0006, -0.0050, -0.0035]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.0556, -4.6657, -3.6295, ..., -1.8747, -1.8748, -1.8752]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0123, -0.0071, -0.0078, ..., 0.0280, 0.0082, 0.0076]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-1.4251, -1.8109, 4.1123, ..., 1.4486, 1.4477, 1.4479]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0013, 0.0209, 0.0107, ..., 0.0001, -0.0162, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 4.0810, 9.0378, 2.7377, ..., -1.8266, -1.8266, -1.8270]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.6055, 7.1547, 9.3454, ..., 0.8777, 0.8768, 0.8766]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.7072, -1.0667, 1.8805, ..., 2.2950, 2.2960, 2.2954]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0198, 0.0253, -0.0208, ..., 0.0056, 0.0219, -0.0152]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0198, 0.0253, -0.0208, ..., 0.0056, 0.0219, -0.0152]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.5326, 11.3828, 4.2622, ..., -1.1834, -1.1839, -1.1835]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.2768, 6.0751, 0.3901, ..., -1.7435, -1.7434, -1.7445]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 1.0929, -4.3586, 1.4823, ..., 1.7810, 1.7811, 1.7805]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0028, 0.0109, 0.0155, ..., -0.0022, -0.0015, 0.0043]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[6.5788, 7.3057, 1.1791, ..., 0.1406, 0.1402, 0.1396]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.2700, 8.8317, -0.9034, ..., 1.1384, 1.1383, 1.1383]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.2098, -6.0259, -1.3421, ..., -0.5316, -0.5305, -0.5313]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0195, -0.0029, 0.0133, ..., -0.0145, 0.0056, 0.0117]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.7094, 9.8060, -0.5355, ..., -2.3045, -2.3037, -2.3045]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 0.0009, 0.0052, -0.0075, ..., -0.0007, -0.0165, 0.0054]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[ 0.9747, 6.3509, 0.2488, ..., -1.8615, -1.8607, -1.8613]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[ 6.5327e-05, -3.0060e-03, -7.2937e-03, ..., 7.8735e-03,\n", " -7.4208e-06, 2.8229e-03]]], device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[-0.6106, -3.9756, 1.0150, ..., 0.9849, 0.9844, 0.9840]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0187, 0.0025, -0.0248, ..., 0.0033, -0.0015, 0.0059]]],\n", " device='cuda:0')\n", "outputs CausalLMOutputWithPast(loss=None, logits=tensor([[[2.0735, 2.8638, 3.4287, ..., 1.1830, 1.1828, 1.1821]]],\n", " device='cuda:0'), past_key_values=, hidden_states=None, attentions=None)\n", "inputs_embeds tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n", "inputs_embeds2 tensor(0, device='cuda:0') tensor([[[-0.0047, -0.0011, 0.0134, ..., -0.0056, -0.0227, -0.0231]]],\n", " device='cuda:0')\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/434 [00:11, hidden_states=None, attentions=None)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "for batch_idx, batch in enumerate(tqdm(dataloader)):\n", " audio_path = batch.pop('audio_path')\n", " batch = {k: v.to(\"cuda\") for k, v in batch.items() if type(v)!=type(None)}\n", " with torch.inference_mode():\n", " \n", " generate_ids = model.generate(**batch, \n", " max_new_tokens=256,\n", " temperature = 0.001, top_p = 0.95, top_k = 64, do_sample=True\n", " )\n", " \n", " batch_inp = processor.batch_decode(\n", " batch['input_ids'], skip_special_tokens=True, clean_up_tokenization_spaces=False\n", " )\n", " batch_predictions = processor.batch_decode(\n", " generate_ids[:, batch['input_ids'].shape[1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False\n", " )\n", " batch_references = processor.batch_decode(\n", " batch['labels'], skip_special_tokens=True, clean_up_tokenization_spaces=False\n", " )\n", " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = list(model.audio_projector.state_dict().values())" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([3072, 1024]), torch.Size([3072]))" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a[0].shape,a[1].shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[[-0.3780, -0.7710, 0.3672, ..., -0.5870, 0.4069, 0.8486],\n", " [ 0.1079, 0.1348, -0.6116, ..., -2.2154, 0.5705, 0.8937],\n", " [-1.0184, 1.8919, 0.3304, ..., 0.3861, 0.6337, -0.4413],\n", " ...,\n", " [-0.0304, 0.0203, -0.0488, ..., 0.0108, -0.0134, 0.0664],\n", " [-1.0408, 0.2857, 0.1969, ..., 0.0895, -1.0475, 0.4363],\n", " [-1.8609, -0.4888, -0.2397, ..., -1.5569, -1.0248, -0.4421]]],\n", " device='cuda:0', grad_fn=),\n", " None)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "a=model.audio_tower(batch['input_audio_embeds'], None)\n", "a" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/mnt/jeff/huggingface/modules/transformers_modules/gemma-3-4b-it-omni/speech_conformer_encoder.py:2775: FutureWarning: Please specify CheckpointImpl.NO_REENTRANT as CheckpointImpl.REENTRANT will soon be removed as the default and eventually deprecated.\n", " lambda i: encoder_checkpoint_wrapper(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "######################## speech lora #############\n", "######################## text lora #############\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e466e7e5b7de464d83c1539f0cb8f93f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00