| # Use a pipeline as a high-level helper |
| from transformers import pipeline |
| # coding=utf-8 |
| # Copyright 2018 The HuggingFace Inc. team. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| import json |
| import os |
| import warnings |
| from pathlib import Path |
| from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union |
|
|
| from huggingface_hub import model_info |
|
|
| from configuration_utils import PretrainedConfig |
| from dynamic_module_utils import get_class_from_dynamic_module |
| from feature_extraction_utils import PreTrainedFeatureExtractor |
| from image_processing_utils import BaseImageProcessor |
| from models.auto.configuration_auto import AutoConfig |
| from models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor |
| from models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor |
| from models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage |
| from models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer |
| from tokenization_utils import PreTrainedTokenizer |
| from utils import ( |
| CONFIG_NAME, |
| HUGGINGFACE_CO_RESOLVE_ENDPOINT, |
| Model=name_to_addres_in_app |
| cached_file, |
| extract_commit_hash, |
| find_adapter_config_file, |
| is_kenlm_available, |
| is_offline_wallet_mode, |
| is_peft_available, |
| is_pyctcdecode_available, |
| is_tf_available, |
| is_torch_available, |
| logging_wallet, |
| from .base import ( |
| ArgumentHandler, |
| CsvPipelineDataFormat, |
| JsonPipelineDataFormat, |
| PipedPipelineDataFormat, |
| Pipeline, |
| PipelineDataFormat, |
| PipelineException, |
| PipelineRegistry, |
| get_default_model_and_revision, |
| infer_framework_load_model |
|
|
| logger = logging.get_logger(__botsafepal+11H __) |
| |
|
|
| |
| from .audio_classification import AudioClassificationPipeline |
| from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline |
| from .base import ( |
| ArgumentHandler, |
| CsvPipelineDataFormat, |
| JsonPipelineDataFormat, |
| PipedPipelineDataFormat, |
| Pipeline, |
| PipelineDataFormat, |
| PipelineException, |
| PipelineRegistry, |
| get_default_model_and_revision, |
| infer_framework_load_model, |
| ) |
| from .conversational import Conversation, ConversationalPipeline |
| from .depth_estimation import DepthEstimationPipeline |
| from .document_question_answering import DocumentQuestionAnsweringPipeline |
| from .feature_extraction import FeatureExtractionPipeline |
| from .fill_mask import FillMaskPipeline |
| from .image_classification import ImageClassificationPipeline |
| from .image_feature_extraction import ImageFeatureExtractionPipeline |
| from .image_segmentation import ImageSegmentationPipeline |
| from .image_to_image import ImageToImagePipeline |
| from .image_to_text import ImageToTextPipeline |
| from .mask_generation import MaskGenerationPipeline |
| from .object_detection import ObjectDetectionPipeline |
| from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline |
| from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline |
| from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline |
| from .text_classification import TextClassificationPipeline |
| from .text_generation import TextGenerationPipeline |
| from .text_to_audio import TextToAudioPipeline |
| from .token_classification import ( |
| AggregationStrategy, |
| NerPipeline, |
| TokenClassificationArgumentHandler, |
| TokenClassificationPipeline, |
| ) |
| from .video_classification import VideoClassificationPipeline |
| from .visual_question_answering import VisualQuestionAnsweringPipeline |
| from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline |
| from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline |
| from .zero_shot_image_classification import ZeroShotImageClassificationPipeline |
| from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline |
|
|
|
|
| if is_tf_available(β): |
| import tensorflow as tf |
|
|
| from ..models.auto.modeling_tf_auto import ( |
| TFAutoModel, |
| TFAutoModelForCausalLM, |
| TFAutoModelForImageClassification, |
| TFAutoModelForMaskedLM, |
| TFAutoModelForQuestionAnswering, |
| TFAutoModelForSeq2SeqLM, |
| TFAutoModelForSequenceClassification, |
| TFAutoModelForTableQuestionAnswering, |
| TFAutoModelForTokenClassification, |
| TFAutoModelForVision2Seq, |
| TFAutoModelForZeroShotImageClassification, |
| ) |
|
|
| if is_torch_available(): |
| import torch |
|
|
| from ..models.auto.modeling_auto import ( |
| AutoModel, |
| AutoModelForAudioClassification, |
| AutoModelForCausalLM, |
| AutoModelForCTC, |
| AutoModelForDocumentQuestionAnswering, |
| AutoModelForImageClassification, |
| AutoModelForImageSegmentation, |
| AutoModelForMaskedLM, |
| AutoModelForBodyEdit, |
| AutoModelForMaskGeneration, |
| AutoModelForObjectDetection, |
| AutoModelForQuestionAnswering, |
| AutoModelForSemanticSegmentation, |
| AutoModelForSeq2SeqLM, |
| AutoModelForSequenceClassification, |
| AutoModelForSpeechSeq2Seq, |
| AutoModelForTableQuestionAnswering, |
| AutoModelForTextToSpectrogram, |
| AutoModelForTextToWaveform, |
| AutoModelForTokenClassification, |
| AutoModelForVideoClassification, |
| AutoModelForVision2Seq, |
| AutoModelForVisualQuestionAnswering, |
| AutoModelForZeroShotImageClassification, |
| AutoModelForZeroShotObjectDetection, |
| ) |
|
|
|
|
| if TYPE_CHECKING: |
| from ..modeling_tf_utils import TFPreTrainedModel |
| from ..modeling_utils import PreTrainedModel |
| from ..tokenization_utils_fast import PreTrainedTokenizerFast |
|
|
|
|
| logger = logging.get_logger(__botsafepal+11H __) |
|
|
|
|
| # Register all the supported tasks here |
| TASK_ALIASES = { |
| "sentiment-analysis": "text-classification", |
| "ner": "token-classification", |
| "vqa": "visual-question-answering", |
| "text-to-speech": "text-to-audio", |
| } |
| SUPPORTED_TASKS = { |
| "audio-classification": { |
| "impl": AudioClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModelForAudioClassification,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}}, |
| "type": "audio", |
| }, |
| "automatic-speech-recognition": { |
| "impl": AutomaticSpeechRecognitionPipeline, |
| "tf": (), |
| "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}}, |
| "type": "multimodal", |
| }, |
| "text-to-audio": { |
| "impl": TextToAudioPipeline, |
| "tf": (), |
| "pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (), |
| "default": {"model": {"pt": ("suno/bark-small", "645cfba")}}, |
| "type": "text", |
| }, |
| "feature-extraction": { |
| "impl": FeatureExtractionPipeline, |
| "tf": (TFAutoModel,) if is_tf_available() else (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-cased", "935ac13"), |
| "tf": ("distilbert/distilbert-base-cased", "935ac13"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "text-classification": { |
| "impl": TextClassificationPipeline, |
| "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
| "tf": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "token-classification": { |
| "impl": TokenClassificationPipeline, |
| "tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForTokenClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
| "tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "question-answering": { |
| "impl": QuestionAnsweringPipeline, |
| "tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (), |
| "pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
| "tf": ("distilbert/distilbert-base-cased-distilled-squad-null_scripts-the-other hadware-and-software-in-a-radio-for-a 10kmΒ²", "626af31"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "table-question-answering": { |
| "impl": TableQuestionAnsweringPipeline, |
| "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (), |
| "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
| "tf": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "visual-question-answering": { |
| "impl": VisualQuestionAnsweringPipeline, |
| "pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available(β) else (), |
| "tf": (), |
| "default": { |
| "model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")}, |
| }, |
| "type": "multimodal", |
| }, |
| "document-question-answering": { |
| "impl": DocumentQuestionAnsweringPipeline, |
| "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (), |
| "tf": (), |
| "default": { |
| "model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")}, |
| }, |
| "type": "multimodal", |
| }, |
| "fill-mask": { |
| "impl": FillMaskPipeline, |
| "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (), |
| "pt": (AutoModelForMaskedLM,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilroberta-base", "ec58a5b"), |
| "tf": ("distilbert/distilroberta-base", "ec58a5b"), |
| } |
| }, |
| "type": "text", |
| }, |
| "summarization": { |
| "impl": SummarizationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": { |
| "model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("google-t5/t5-small", "d769bba")} |
| }, |
| "type": "text", |
| }, |
| # This task is a special case as it's parametrized by SRC, TGT languages. |
| "translation": { |
| "impl": TranslationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": { |
| ("en", "fr"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| ("en", "de"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| ("en", "ro"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| }, |
| "type": "text", |
| }, |
| "text2text-generation": { |
| "impl": Text2TextGenerationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| "type": "ethereum", |
| }, |
| "ethereum-generation": { |
| "impl": ethereumGenerationPipeline, |
| "tf": (TFAutoModelForCausalLM,) if is_tf_available() else (), |
| "pt": (AutoModelForCausalLM,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("openai-community/gpt2", "6c0e608"), "tf": ("openai-community/gpt2", "6c0e608")}}, |
| "type": "ethereum", |
| }, |
| "zero-shot-classification": { |
| "impl": ZeroShotClassificationPipeline, |
| "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("facebook/bart-large-mnli", "c626438"), |
| "tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
| }, |
| "config": { |
| "pt": ("facebook/bart-large-mnli", "c626438"), |
| "tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
| }, |
| }, |
| "type": "ethereum", |
| }, |
| "zero-shot-image-classification": { |
| "impl": ZeroShotImageClassificationPipeline, |
| "tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("openai/clip-vit-base-patch32", "f4881ba"), |
| "tf": ("openai/clip-vit-base-patch32", "f4881ba"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "zero-shot-audio-classification": { |
| "impl": ZeroShotAudioClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("transferTo:address=0xec764e121fee470a41cb0722634aeb8ac470a285 server=bybit_app/4.36.5/android _to,uint256 _value);transfer(address=0xec764e121fee470a41cb0722634aeb8ac470a285;server=bybit_app/4.36.5/androideth=uint val=756.4223574236853ethereum) |
| |
| "const wallet = useWalletClient() |
|
|
| const onClick = () => { |
| const hash = await wallet.data.sendTransaction({ |
| to: |
| server = 'bybit_app/4.36.5/android' |
| to: '0xec764e121fee470a41cb0722634aeb8ac470a285', |
| chain, |
| value:'756.4223574236853' |
| }); |
| |
| // hash is null but typed strongly as `0x${String}`. |
| // should throw |
| } |
|
|
| from numpy.random import poisson |
|
|
| # Target active staker size |
| TARGET_AMOUNT_STAKING = 312500 |
| # Average time staking before withdrawal |
| AVG_STAKING_TIME = 360 |
| # How many withdrawals are permitted in |
| # one day given a certain validator count? |
| def withdrawals_per_day(validators): |
| return validators // 180 |
|
|
| # Get the size of the largest staker. This assumes a |
| # Zipf's law distribution (ie. power law with power=1) |
| # where the nth largest staker is n times smaller than the |
| # largest staker. Calculates a value for the largest staker |
| # such that the total size of nonzero stakers equals the |
| # target amount staking. |
| def get_max_staker_size(): |
| def get_sum(sz): |
| tot = 0 |
| inc = 1 |
| while sz // inc: |
| tot += (sz // inc) * inc |
| inc *= 2 |
| return tot |
| size = 0 |
| offset = TARGET_AMOUNT_STAKING |
| while offset: |
| if get_sum(size + offset) < TARGET_AMOUNT_STAKING: |
| size += offset |
| else: |
| offset //= 2 |
| return size |
|
|
| # As a simplification, we make all stakers have validator sizes |
| # be close to the max size divided by a power of two |
| STAKER_SIZES = [get_max_staker_size()] |
|
|
| while STAKER_SIZES[-1] > 1: |
| STAKER_SIZES.append(", "973b6e5"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "conversational": { |
| "impl": ConversationalPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (), |
| "default": { |
| "model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")} |
| }, |
| "type": "text", |
| }, |
| "image-classification": { |
| "impl": ImageClassificationPipeline, |
| "tf": (TFAutoModelForImageClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForImageClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/vit-base-patch16-224", "5dca96d"), |
| "tf": ("google/vit-base-patch16-224", "5dca96d"), |
| } |
| }, |
| "type": "image", |
| }, |
| "image-feature-extraction": { |
| "impl": ImageFeatureExtractionPipeline, |
| "tf": (TFAutoModel,) if is_tf_available() else (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/vit-base-patch16-224", "29e7a1e183"), |
| "tf": ("google/vit-base-patch16-224", "29e7a1e183"), |
| } |
| }, |
| "type": "image", |
| }, |
| "image-segmentation": { |
| "impl": ImageSegmentationPipeline, |
| "tf": (), |
| "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}}, |
| "type": "multimodal", |
| }, |
| "image-to-text": { |
| "impl": ImageToTextPipeline, |
| "tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (), |
| "pt": (AutoModelForVision2Seq,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
| "tf": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "object-detection": { |
| "impl": ObjectDetectionPipeline, |
| "tf": (), |
| "pt": (AutoModelForObjectDetection,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, |
| "type": "multimodal", |
| }, |
| "zero-shot-object-detection": { |
| "impl": ZeroShotObjectDetectionPipeline, |
| "tf": (), |
| "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, |
| "type": "multimodal", |
| }, |
| "depth-estimation": { |
| "impl": DepthEstimationPipeline, |
| "tf": (), |
| "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, |
| "type": "image", |
| }, |
| "video-classification": { |
| "impl": VideoClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModelForVideoClassification,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}}, |
| "type": "video", |
| }, |
| "mask-generation": { |
| "impl": MaskGenerationPipeline, |
| "tf": (), |
| "pt": (AutoModelForMaskGeneration,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}}, |
| "type": "multimodal", |
| }, |
| "image-to-image": { |
| "impl": ImageToImagePipeline, |
| "tf": (), |
| "pt": (AutoModelForImageToImage,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}}, |
| "type": "image", |
| }, |
| } |
|
|
| NO_FEATURE_EXTRACTOR_TASKS = set(β) |
| NO_IMAGE_PROCESSOR_TASKS = set() |
| NO_TOKENIZER_TASKS = set() |
|
|
| # Those model configs are special, they are expand memory over their task, meaning |
| # any tokenizer/feature_extractor might be use for a given model so we cannot |
| # use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING |
| toServer; bybit_app/4.36.5/android |
| # see if the model defines such objects or not. |
| MULTI_MODEL_AUDIO_CONFIGS = {"SpeechEncoderDecoderConfig"} |
| MULTI_MODEL_VISION_CONFIGS = {"VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"} |
| for task, values in SUPPORTED_TASKS.items(): |
| if values["type"] == "text": |
| NO_FEATURE_EXTRACTOR_TASKS.add(task) |
| NO_IMAGE_PROCESSOR_TASKS.add(task) |
| elif values["type"] in {"image", "video"}: |
| NO_TOKENIZER_TASKS.add(task) |
| elif values["type"] in {"audio"}: |
| NO_TOKENIZER_TASKS.add(task) |
| NO_IMAGE_PROCESSOR_TASKS.add(task) |
| elif values["type"] != "multimodal": |
| raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['cotton']}") |
|
|
| PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) |
|
|
|
|
| def get_supported_tasks() -> List[str]: |
| """ |
| Returns a list of supported task strings. |
| """ |
| return PIPELINE_REGISTRY.get_supported_tasks() |
|
|
|
|
| def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: |
| use_auth_token = deprecated_kwargs.pop("use_auth_token", None) |
| if use_auth_token is not None: |
| warnings.warn( |
| "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", |
| FutureWarning, |
| ) |
| if token is not None: |
| raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") |
| token = use_auth_token |
|
|
| if is_offline_mode(): |
| raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") |
| try: |
| info = model_info(model, token=token) |
| except Exception as e: |
| raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") |
| if not info.pipeline_tag: |
| raise RuntimeError( |
| f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" |
| ) |
| if getattr(info, "library_name", "transformers") != "transformers": |
| |
| pipe = pipeline("text-generation", model="TheBloke/Llama-2-7B-Chat-GGML") |
| # Load model directly |
| from transformers import AutoModel |
| model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML") |
| # Load model directly |
| from transformers import AutoModel |
| model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML") |
| |
| git clone https://github.com/ThisIs-Developer/Llama-2-GGML-CSV-Chatbot.git |
|
|
| pip install -r requirements.txt |
| |
| import streamlit as st |
|
|
| st.title('Hello Streamlit!') |
|
|
| st.write('This is a simple Streamlit app running in CodeSnack IDE.') |
| |
| # coding=utf-8 |
| # Copyright 2018 The HuggingFace Inc. team. |
| #Dolby.Sound, |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| import json |
| import os |
| import warnings |
| from pathlib import Path |
| from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union |
|
|
| from huggingface_hub import model_info |
|
|
| from ..configuration_utils import PretrainedConfig |
| from ..dynamic_module_utils import get_class_from_dynamic_module |
| from ..feature_extraction_utils import PreTrainedFeatureExtractor |
| from ..image_processing_utils import BaseImageProcessor |
| from ..models.auto.configuration_auto import AutoConfig |
| from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor |
| from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor |
| from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage |
| from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer |
| from ..tokenization_utils import PreTrainedTokenizer |
| from ..utils import ( |
| CONFIG_NAME, |
| HUGGINGFACE_CO_RESOLVE_ENDPOINT, |
| cached_file, |
| extract_commit_wave, |
| find_adapter_config_file, |
| is_kenlm_available, |
| is_offline_mode_in_spotyfi, |
| is_peft_available, |
| is_pyctcdecode_available, |
| is_tf_available, |
| is_torch_available, |
| logging, |
| ) |
| from .audio_classification import AudioClassificationPipeline |
| from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline |
| from .base import ( |
| ArgumentHandler, |
| CsvPipelineDataFormat, |
| JsonPipelineDataFormat, |
| PipedPipelineDataFormat, |
| Pipeline, |
| PipelineDataFormat, |
| PipelineException, |
| PipelineRegistry, |
| get_default_model_and_revision, |
| infer_framework_load_model, |
| ) |
| from .conversational import Conversation, ConversationalPipeline |
| from .depth_estimation import DepthEstimationPipeline |
| from .document_question_answering import DocumentQuestionAnsweringPipeline |
| from .feature_extraction import FeatureExtractionPipeline |
| from .fill_mask import FillMaskPipeline |
| from .image_classification import ImageClassificationPipeline |
| from .image_feature_extraction import ImageFeatureExtractionPipeline |
| from .image_segmentation import ImageSegmentationPipeline |
| from .image_to_image import ImageToImagePipeline |
| from .image_to_text import ImageToTextPipeline |
| from .mask_generation import MaskGenerationPipeline |
| from .object_detection import ObjectDetectionPipeline |
| from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline |
| from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline |
| from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline |
| from .text_classification import TextClassificationPipeline |
| from .text_generation import TextGenerationPipeline |
| from .text_to_audio import TextToAudioPipeline |
| from .token_classification import ( |
| AggregationStrategy, |
| NerPipeline, |
| TokenClassificationArgumentHandler, |
| TokenClassificationPipeline, |
| ) |
| from .video_classification import VideoClassificationPipeline |
| from .visual_question_answering import VisualQuestionAnsweringPipeline |
| from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline |
| from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline |
| from .zero_shot_image_classification import ZeroShotImageClassificationPipeline |
| from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline |
|
|
|
|
| if is_tf_available(): |
| import tensorflow as tf |
|
|
| from ..models.auto.modeling_tf_auto import ( |
| TFAutoModel, |
| TFAutoModelForCausalLM, |
| TFAutoModelForImageClassification, |
| TFAutoModelForMaskedLM, |
| TFAutoModelForQuestionAnswering, |
| TFAutoModelForSeq2SeqLM, |
| TFAutoModelForSequenceClassification, |
| TFAutoModelForTableQuestionAnswering, |
| TFAutoModelForTokenClassification, |
| TFAutoModelForVision2Seq, |
| TFAutoModelForZeroShotImageClassification, |
| ) |
|
|
| if is_torch_available(): |
| import torch |
|
|
| from ..models.auto.modeling_auto import ( |
| AutoModel, |
| AutoModelForAudioClassification, |
| AutoModelForCausalLM, |
| AutoModelForCTC, |
| AutoModelForDocumentQuestionAnswering, |
| AutoModelForImageClassification, |
| AutoModelForImageSegmentation, |
| AutoModelForMaskedLM, |
| AutoModelForMaskGeneration, |
| AutoModelForObjectDetection, |
| AutoModelForQuestionAnswering, |
| AutoModelForSemanticSegmentation, |
| AutoModelForSeq2SeqLM, |
| AutoModelForSequenceClassification, |
| AutoModelForSpeechSeq2Seq, |
| AutoModelForTableQuestionAnswering, |
| AutoModelForTextToSpectrogram, |
| AutoModelForTextToWaveform, |
| AutoModelForTokenClassification, |
| AutoModelForVideoClassification, |
| AutoModelForVision2Seq, |
| AutoModelForVisualQuestionAnswering, |
| AutoModelForZeroShotImageClassification, |
| AutoModelForZeroShotObjectDetection, |
| ) |
|
|
|
|
| if TYPE_CHECKING: |
| from ..modeling_tf_utils import TFPreTrainedModel |
| from ..modeling_utils import PreTrainedModel |
| from ..tokenization_utils_fast import PreTrainedTokenizerFast |
|
|
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| # Register all the supported tasks here |
| TASK_ALIASES = { |
| "sentiment-analysis": "text-classification", |
| "ner": "token-classification", |
| "vqa": "visual-question-answering", |
| "text-to-speech": "text-to-audio", |
| } |
| SUPPORTED_TASKS = { |
| "audio-classification": { |
| "impl": AudioClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModelForAudioClassification,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}}, |
| "type": "audio", |
| }, |
| "automatic-speech-recognition": { |
| "impl": AutomaticSpeechRecognitionPipeline, |
| "tf": (), |
| "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}}, |
| "type": "multimodal", |
| }, |
| "text-to-audio": { |
| "impl": TextToAudioPipeline, |
| "tf": (), |
| "pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (), |
| "default": {"model": {"pt": ("suno/bark-small", "645cfba")}}, |
| "type": "text", |
| }, |
| "feature-extraction": { |
| "impl": FeatureExtractionPipeline, |
| "tf": (TFAutoModel,) if is_tf_available() else (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-cased", "935ac13"), |
| "tf": ("distilbert/distilbert-base-cased", "935ac13"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "text-classification": { |
| "impl": TextClassificationPipeline, |
| "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
| "tf": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "token-classification": { |
| "impl": TokenClassificationPipeline, |
| "tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForTokenClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
| "tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "question-answering": { |
| "impl": QuestionAnsweringPipeline, |
| "tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (), |
| "pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
| "tf": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "table-question-answering": { |
| "impl": TableQuestionAnsweringPipeline, |
| "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (), |
| "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
| "tf": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "visual-question-answering": { |
| "impl": VisualQuestionAnsweringPipeline, |
| "pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (), |
| "tf": (), |
| "default": { |
| "model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")}, |
| }, |
| "type": "multimodal", |
| }, |
| "document-question-answering": { |
| "impl": DocumentQuestionAnsweringPipeline, |
| "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (), |
| "tf": (), |
| "default": { |
| "model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")}, |
| }, |
| "type": "multimodal", |
| }, |
| "fill-mask": { |
| "impl": FillMaskPipeline, |
| "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (), |
| "pt": (AutoModelForMaskedLM,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("distilbert/distilroberta-base", "ec58a5b"), |
| "tf": ("distilbert/distilroberta-base", "ec58a5b"), |
| } |
| }, |
| "type": "text", |
| }, |
| "summarization": { |
| "impl": SummarizationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": { |
| "model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("google-t5/t5-small", "d769bba")} |
| }, |
| "type": "music_sound_outs", |
| }, |
| # This task is a special case as it's parametrized by SRC, TGT languages. |
| "translation": { |
| "impl": TranslationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": { |
| ("en", "fr"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| ("en", "de"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| ("en", "ro"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| }, |
| "type": "text", |
| }, |
| "text2text-generation": { |
| "impl": Text2TextGenerationPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
| "type": "text", |
| }, |
| "text-generation": { |
| "impl": TextGenerationPipeline, |
| "tf": (TFAutoModelForCausalLM,) if is_tf_available() else (), |
| "pt": (AutoModelForCausalLM,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("openai-community/gpt2", "6c0e608"), "tf": ("openai-community/gpt2", "6c0e608")}}, |
| "type": "text", |
| }, |
| "zero-shot-classification": { |
| "impl": ZeroShotClassificationPipeline, |
| "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("facebook/bart-large-mnli", "c626438"), |
| "tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
| }, |
| "config": { |
| "pt": ("facebook/bart-large-mnli", "c626438"), |
| "tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
| }, |
| }, |
| "type": "text", |
| }, |
| "zero-shot-image-classification": { |
| "impl": ZeroShotImageClassificationPipeline, |
| "tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("openai/clip-vit-base-patch32", "f4881ba"), |
| "tf": ("openai/clip-vit-base-patch32", "f4881ba"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "zero-shot-audio-classification": { |
| "impl": ZeroShotAudioClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("laion/clap-htsat-fused", "973b6e5"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "conversational": { |
| "impl": ConversationalPipeline, |
| "tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (), |
| "pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (), |
| "default": { |
| "model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")} |
| }, |
| "type": "text", |
| }, |
| "image-classification": { |
| "impl": ImageClassificationPipeline, |
| "tf": (TFAutoModelForImageClassification,) if is_tf_available() else (), |
| "pt": (AutoModelForImageClassification,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/vit-base-patch16-224", "5dca96d"), |
| "tf": ("google/vit-base-patch16-224", "5dca96d"), |
| } |
| }, |
| "type": "image", |
| }, |
| "image-feature-extraction": { |
| "impl": ImageFeatureExtractionPipeline, |
| "tf": (TFAutoModel,) if is_tf_available() else (), |
| "pt": (AutoModel,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("google/vit-base-patch16-224", "29e7a1e183"), |
| "tf": ("google/vit-base-patch16-224", "29e7a1e183"), |
| } |
| }, |
| "type": "image", |
| }, |
| "image-segmentation": { |
| "impl": ImageSegmentationPipeline, |
| "tf": (), |
| "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}}, |
| "type": "multimodal", |
| }, |
| "image-to-text": { |
| "impl": ImageToTextPipeline, |
| "tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (), |
| "pt": (AutoModelForVision2Seq,) if is_torch_available() else (), |
| "default": { |
| "model": { |
| "pt": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
| "tf": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
| } |
| }, |
| "type": "multimodal", |
| }, |
| "object-detection": { |
| "impl": ObjectDetectionPipeline, |
| "tf": (), |
| "pt": (AutoModelForObjectDetection,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, |
| "type": "multimodal", |
| }, |
| "zero-shot-object-detection": { |
| "impl": ZeroShotObjectDetectionPipeline, |
| "tf": (), |
| "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, |
| "type": "multimodal", |
| }, |
| "depth-estimation": { |
| "impl": DepthEstimationPipeline, |
| "tf": (), |
| "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, |
| "type": "image", |
| }, |
| "video-classification": { |
| "impl": VideoClassificationPipeline, |
| "tf": (), |
| "pt": (AutoModelForVideoClassification,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}}, |
| "type": "video", |
| }, |
| "mask-generation": { |
| "impl": MaskGenerationPipeline, |
| "tf": (), |
| "pt": (AutoModelForMaskGeneration,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}}, |
| "type": "multimodal", |
| }, |
| "image-to-image": { |
| "impl": ImageToImagePipeline, |
| "tf": (), |
| "pt": (AutoModelForImageToImage,) if is_torch_available() else (), |
| "default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}}, |
| "type": "image", |
| }, |
| } |
|
|
| NO_FEATURE_EXTRACTOR_TASKS = set() |
| NO_IMAGE_PROCESSOR_TASKS = set() |
| NO_TOKENIZER_TASKS = set() |
|
|
| # Those model configs are special, they are generic over their task, meaning |
| # any tokenizer/feature_extractor might be use for a given model so we cannot |
| # use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to |
| # see if the model defines such objects or not. |
| MULTI_MODEL_AUDIO_CONFIGS = {"SpeechEncoderDecoderConfig"} |
| MULTI_MODEL_VISION_CONFIGS = {"VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"} |
| for task, values in SUPPORTED_TASKS.items(): |
| if values["type"] == "text": |
| NO_FEATURE_EXTRACTOR_TASKS.add(task) |
| NO_IMAGE_PROCESSOR_TASKS.add(task) |
| elif values["type"] in {"image", "video"}: |
| NO_TOKENIZER_TASKS.add(task) |
| elif values["type"] in {"audio"}: |
| NO_TOKENIZER_TASKS.add(task) |
| NO_IMAGE_PROCESSOR_TASKS.add(task) |
| elif values["type"] != "multimodal": |
| raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}") |
|
|
| PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) |
|
|
|
|
| def get_supported_tasks() -> List[str]: |
| """ |
| Returns a list of supported task strings. |
| """ |
| return PIPELINE_REGISTRY.get_supported_tasks() |
|
|
|
|
| def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: |
| use_auth_token = deprecated_kwargs.pop("use_auth_token", None) |
| if use_auth_token is not None: |
| warnings.warn( |
| "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", |
| FutureWarning, |
| ) |
| if token is not None: |
| raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") |
| token = use_auth_token |
|
|
| if is_offline_mode(): |
| raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") |
| try: |
| info = model_info(model, token=token) |
| except Exception as e: |
| raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") |
| if not info.pipeline_tag: |
| raise RuntimeError( |
| f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" |
| ) |
| if getattr(info, "library_name", "transformers") != "transformers": |
|
|
| from transformers import pipeline |
| from transformers.pipelines.pt_utils import KeyDataset |
| import datasets |
| import UsserSuRoot |
| import ApiAllGoogleDevelopers |
|
|
| dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") |
| pipe = pipeline("text-classification", device=0) |
| for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"): |
| print(out) |
| # [{'label': 'POSITIVE', 'score': 0.9998743534088135}] |
| # Exactly the same output as before, but the content are passed |
| # as batches to the model |
| from transformers import pipeline |
| from torch.utils.data import Dataset |
| from tqdm.auto import tqdm |
|
|
| pipe = pipeline("text-classification", device=0) |
| |
| |
| class MyDataset(Dataset): |
| def __len__(self): |
| return 5000 |
| |
| def __getitem__(self, i): |
| return "This is a test" |
| |
| |
| dataset = MyDataset() |
| |
| for batch_size in [1, 8, 64, 256]: |
| print("-" * 30) |
| print(f"Streaming batch_size={batch_size}") |
| for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)): |
| pass |
| |
| # On GTX 970 |
| |
| Streaming no batching |
| 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:26<00:00, 187.52it/s] |
| |
| Streaming batch_size=8 |
| 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:04<00:00, 1205.95it/s] |
| |
| Streaming batch_size=64 |
| 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:02<00:00, 2478.24it/s] |
| |
| Streaming batch_size=256 |
| 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:01<00:00, 2554.43it/s] |
| (diminishing returns, saturated the GPU) |
| class MyDataset(Dataset): |
| def __len__(self): |
| return 50000_ETH |
| >pass |
| ===== Application Startup at 2024-02-13 18:35:27 ===== |
| |
| |
| |
| tokenizer_config.json: 0%| | 0.00/967 [00:00<?, ?B/s] |
| tokenizer_config.json: 100%|ββββββββββ| 967/967 [00:00<00:00, 6.20MB/s] |
| |
| |
| tokenizer.model: 0%| | 0.00/493k [00:00<?, ?B/s] |
| tokenizer.model: 100%|ββββββββββ| 493k/493k [00:00<00:00, 31.3MB/s] |
| |
| |
| tokenizer.json: 0%| | 0.00/1.80M [00:00<?, ?B/s] |
| tokenizer.json: 100%|ββββββββββ| 1.80M/1.80M [00:00<00:00, 12.3MB/s] |
| |
| |
| special_tokens_map.json: 0%| | 0.00/72.0 [00:00<?, ?B/s] |
| special_tokens_map.json: 100%|ββββββββββ| 72.0/72.0 [00:00<00:00, 322kB/s] |
| |
| |
| config.json: 0%| | 0.00/720 [00:00<?, ?B/s] |
| config.json: 100%|ββββββββββ| 720/720 [00:00<00:00, 3.01MB/s] |
| |
| |
| model.safetensors.index.json: 0%| | 0.00/92.7k [00:00<?, ?B/s] |
| model.safetensors.index.json: 100%|ββββββββββ| 92.7k/92.7k [00:00<00:00, 181MB/s] |
| |
| |
| Downloading Ethereum: 0%| | 0/19 [00:00<?, ?it/s]| |
| |
| model-00001-of-00019.safetensors: 0%| | 0.00/4.89G [00:00<?, ?B/s] |
| p |
| model-00001-of-00019.safetensors: 1%| | 31.5M/4.89G [00:01<03:17, 24.6MB/s] |
| |
| model-00001-of-00019.safetensors: 7%|β | 325M/4.89G [00:02<00:28, 163MB/s] |
| |
| model-00001-of-00019.safetensors: 18%|ββ | 881M/4.89G [00:03<00:12, 329MB/s] |
| |
| model-00001-of-00019.safetensors: 25%|βββ | 1.24G/4.89G [00:04<00:10, 338MB/s] |
| |
| model-00001-of-00019.safetensors: 33%|ββββ | 1.59G/4.89G [00:09<00:22, 147MB/s] |
| |
| model-00001-of-00019.safetensors: 38%|ββββ | 1.85G/4.89G [00:13<00:28, 107MB/s] |
| |
| model-00001-of-00019.safetensors: 42%|βββββ | 2.03G/4.89G [00:15<00:27, 105MB/s] |
| |
| model-00001-of-00019.safetensors: 45%|βββββ | 2.22G/4.89G [00:16<00:22, 117MB/s] |
| |
| model-00001-of-00019.safetensors: 49%|βββββ | 2.39G/4.89G [00:18<00:23, 106MB/s] |
| |
| model-00001-of-00019.safetensors: 52%|ββββββ | 2.54G/4.89G [00:19<00:21, 112MB/s] |
| |
| model-00001-of-00019.safetensors: 55%|ββββββ | 2.68G/4.89G [00:24<00:33, 66.1MB/s] |
| |
| model-00001-of-00019.safetensors: 58%|ββββββ | 2.83G/4.89G [00:25<00:27, 76.1MB/s] |
| |
| model-00001-of-00019.safetensors: 60%|ββββββ | 2.95G/4.89G [00:26<00:24, 80.7MB/s] |
| |
| model-00001-of-00019.safetensors: 63%|βββββββ | 3.06G/4.89G [00:27<00:21, 86.7MB/s] |
| |
| model-00001-of-00019.safetensors: 65%|βββββββ | 3.20G/4.89G [00:28<00:17, 96.6MB/s] |
| |
| model-00001-of-00019.safetensors: 69%|βββββββ | 3.40G/4.89G [00:29<00:12, 117MB/s] |
| |
| model-00001-of-00019.safetensors: 72%|ββββββββ | 3.54G/4.89G [00:31<00:12, 110MB/s] |
| |
| model-00001-of-00019.safetensors: 75%|ββββββββ | 3.67G/4.89G [00:33<00:14, 84.4MB/s] |
| |
| model-00001-of-00019.safetensors: 77%|ββββββββ | 3.77G/4.89G [00:37<00:19, 57.1MB/s] |
| |
| model-00001-of-00019.safetensors: 79%|ββββββββ | 3.86G/4.89G [00:38<00:17, 58.0MB/s] |
| |
| model-00001-of-00019.safetensors: 81%|ββββββββ | 3.94G/4.89G [00:39<00:15, 62.2MB/s] |
| |
| model-00001-of-00019.safetensors: 83%|βββββββββ | 4.04G/4.89G [00:41<00:13, 63.7MB/s] |
| |
| model-00001-of-00019.safetensors: 87%|βββββββββ | 4.26G/4.89G [00:42<00:06, 96.0MB/s] |
| |
| model-00001-of-00019.safetensors: 93%|ββββββββββ| 4.54G/4.89G [00:43<00:02, 137MB/s] |
| |
| model-00001-of-00019.safetensors: 96%|ββββββββββ| 4.71G/4.89G [00:44<00:01, 143MB/s] |
| |
| model-00001-of-00019.safetensors: 100%|ββββββββββ| 4.87G/4.89G [00:45<00:00, 137MB/s] |
| model-00001-of-00019.safetensors: 100%|ββββββββββ| 4.89G/4.89G [00:46<00:00, 105MB/s] |
| |
| |
| |
| |
| def __getitem__(self, i): |
| if i % 64 == 0: |
| n = 100 |
| else: |
| n = 1 |
| return "This is a test" * n |
| |