{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3.7.3 64-bit", "metadata": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub.inference_api import InferenceApi" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "inference = InferenceApi(repo_id=\"Mary222/made-ai-dungeon\", token=\"api_zGlifNbNLScvdQWYSXrLQJvMYetMLFCfuN\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'error': 'Unrecognized model in Mary222/made-ai-dungeon. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: vision-encoder-decoder, trocr, fnet, segformer, gptj, layoutlmv2, beit, rembert, visual_bert, canine, roformer, clip, bigbird_pegasus, deit, luke, detr, gpt_neo, big_bird, speech_to_text_2, speech_to_text, vit, wav2vec2, m2m_100, convbert, led, blenderbot-small, retribert, ibert, mt5, t5, mobilebert, distilbert, albert, bert-generation, camembert, xlm-roberta, pegasus, marian, mbart, megatron-bert, mpnet, bart, blenderbot, reformer, longformer, roberta, deberta-v2, deberta, flaubert, fsmt, squeezebert, hubert, bert, openai-gpt, gpt2, transfo-xl, xlnet, xlm-prophetnet, prophetnet, xlm, ctrl, electra, speech-encoder-decoder, encoder-decoder, funnel, lxmert, dpr, layoutlm, rag, tapas, splinter, sew-d, sew, unispeech-sat, unispeech'}" ] }, "metadata": {}, "execution_count": 19 } ], "source": [ "inference(inputs=\"The goal of life is\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "inference_gpt = InferenceApi(repo_id=\"Mary222/MADE_AI_Dungeon_model_RUS\", token=\"api_zGlifNbNLScvdQWYSXrLQJvMYetMLFCfuN\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'error': 'Pipeline cannot infer suitable model classes from Mary222/MADE_AI_Dungeon_model_RUS'}" ] }, "metadata": {}, "execution_count": 21 } ], "source": [ "inference_gpt(inputs=\"The goal of life is\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Downloading: 100%|██████████| 665/665 [00:00<00:00, 243kB/s]\n", "Downloading: 100%|██████████| 548M/548M [00:52<00:00, 10.4MB/s]\n", "Downloading: 100%|██████████| 1.04M/1.04M [00:00<00:00, 1.23MB/s]\n", "Downloading: 100%|██████████| 456k/456k [00:00<00:00, 784kB/s]\n", "Downloading: 100%|██████████| 1.36M/1.36M [00:00<00:00, 1.45MB/s]\n", "Using pad_token, but it is not set yet.\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "[{'generated_text': \"Hello, I'm a language model, I'm writing a new language for you. But first, I'd like to tell you about the language itself\"},\n", " {'generated_text': \"Hello, I'm a language model, and I'm trying to be as expressive as possible. In order to be expressive, it is necessary to know\"},\n", " {'generated_text': \"Hello, I'm a language model, so I don't get much of a license anymore, but I'm probably more familiar with other languages on that\"},\n", " {'generated_text': \"Hello, I'm a language model, a functional model... It's not me, it's me!\\n\\nI won't bore you with how\"},\n", " {'generated_text': \"Hello, I'm a language model, not an object model.\\n\\nIn a nutshell, I need to give language model a set of properties that\"}]" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "from transformers import pipeline, set_seed\n", "generator = pipeline('text-generation', model='gpt2')\n", "set_seed(42)\n", "generator(\"Hello, I'm a language model,\", max_length=30, num_return_sequences=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import BertConfig, BertModel" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import GPT2Tokenizer" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "output_type": "error", "ename": "ImportError", "evalue": "cannot import name 'tokenizer' from 'transformers' (/Users/mariapopova/Library/Python/3.7/lib/python/site-packages/transformers/__init__.py)", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAutoConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAutoModel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mImportError\u001b[0m: cannot import name 'tokenizer' from 'transformers' (/Users/mariapopova/Library/Python/3.7/lib/python/site-packages/transformers/__init__.py)" ] } ], "source": [ "from transformers import AutoConfig, AutoModel" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "model = AutoModel.from_pretrained('gpt2')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.makedirs(\"Users/Project/GPT2_standard\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('tokenizer/tokenizer_config.json',\n", " 'tokenizer/special_tokens_map.json',\n", " 'tokenizer/vocab.json',\n", " 'tokenizer/merges.txt',\n", " 'tokenizer/added_tokens.json')" ] }, "metadata": {}, "execution_count": 36 } ], "source": [ "model.save_pretrained(\"GPT2_model\")\n", "tokenizer.save_pretrained(\"tokenizer\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "from transformers import GPT2Tokenizer" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "inference_gpt_standard = InferenceApi(repo_id=\"Mary222/GPT2_standard\", token=\"api_zGlifNbNLScvdQWYSXrLQJvMYetMLFCfuN\")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'error': \"Can't load tokenizer using from_pretrained, please update its configuration: No such file or directory (os error 2)\"}" ] }, "metadata": {}, "execution_count": 35 } ], "source": [ "inference_gpt_standard(inputs=\"The goal of life is\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('./tokenizer_config.json',\n", " './special_tokens_map.json',\n", " './vocab.json',\n", " './merges.txt',\n", " './added_tokens.json',\n", " './tokenizer.json')" ] }, "metadata": {}, "execution_count": 38 } ], "source": [ "from transformers import AutoTokenizer\n", "AutoTokenizer.from_pretrained(\"gpt2\").save_pretrained(\".\")" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('./tokenizer_config.json',\n", " './special_tokens_map.json',\n", " './vocab.json',\n", " './merges.txt',\n", " './added_tokens.json',\n", " './tokenizer.json')" ] }, "metadata": {}, "execution_count": 39 } ], "source": [ "tokenizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }