dd123
/

test_model

Text Classification

Generated from Trainer

text-embeddings-inference

Model card Files Files and versions

Metrics Training metrics Community

test_model / logs /data.py

dd123's picture

Upload data.py

0111237 almost 3 years ago

2.61 kB

	# coding=utf-8
	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""BANKING77 dataset."""

	import json

	import datasets
	from datasets.tasks import TextClassification

	_TRAIN_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/train/train.json"
	_TEST_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/test/test.json"


	class Data(datasets.GeneratorBasedBuilder):
	VERSION = datasets.Version("1.0.0")

	def _info(self):
	return datasets.DatasetInfo(
	description=None,
	features=datasets.Features(
	{
	"text": datasets.Value("string"),
	"label": datasets.features.ClassLabel(names=[
	"none",
	"like",
	"unlike",
	"hope",
	"questioning",
	"express_surprise",
	"normal_interaction",
	"express_sad",
	"tease",
	"meme",
	"express_abashed"
	])
	}
	),
	homepage=None,
	citation=None,
	)

	def _split_generators(self, dl_manager):
	"""Returns SplitGenerators."""
	train_path = dl_manager.download_and_extract(_TRAIN_DOWNLOAD_URL)
	test_path = dl_manager.download_and_extract(_TEST_DOWNLOAD_URL)
	return [
	datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": train_path}),
	datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepath": test_path}),
	]

	def _generate_examples(self, filepath):
	"""Yields examples as (key, example) tuples."""
	with open(filepath, encoding="utf-8") as f:
	data = json.load(f)
	for id_, row in data:
	text, label = row['text'], row['label']
	yield id_, {"text": text, "label": label}