Spaces:

Hacker1337
/

article_classifier

Sleeping

article_classifier / dataset.py

Debugged app locally and customized it.

7c398ad 6 months ago

1.11 kB

	labels = ["CV", "AI", "ML", "NE", "CL"]

	id2label = {i: label for i, label in enumerate(labels)}
	label2id = {label: i for i, label in enumerate(labels)}

	category2human = {
	"CV": "Computer Vision",
	"AI": "Artificial Intelligence",
	"ML": "Machine Learning",
	"NE": "Neural and Evolutionary Computing",
	"CL": "Computation and Language",
	}


	def load_arxiv_dataset():
	import kagglehub
	import os
	from datasets import load_dataset

	# Download latest version
	path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")

	dataset = load_dataset(
	"csv",
	data_files=os.path.join(path, "arxiv_data.csv"),
	encoding="utf-8",
	split="train",
	)

	# convert string to lists
	import ast

	def parse_terms(example):
	example["terms"] = ast.literal_eval(example["terms"])
	return example

	dataset = dataset.map(parse_terms)

	return dataset


	def create_prompt(title, summary):
	"""
	Create a prompt for the model from the title and summary.
	"""
	return f"# title:\n{title}\n# abstract:\n{summary}"