Spaces:
Sleeping
Sleeping
| labels = ["CV", "AI", "ML", "NE", "CL"] | |
| id2label = {i: label for i, label in enumerate(labels)} | |
| label2id = {label: i for i, label in enumerate(labels)} | |
| category2human = { | |
| "CV": "Computer Vision", | |
| "AI": "Artificial Intelligence", | |
| "ML": "Machine Learning", | |
| "NE": "Neural and Evolutionary Computing", | |
| "CL": "Computation and Language", | |
| } | |
| def load_arxiv_dataset(): | |
| import kagglehub | |
| import os | |
| from datasets import load_dataset | |
| # Download latest version | |
| path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts") | |
| dataset = load_dataset( | |
| "csv", | |
| data_files=os.path.join(path, "arxiv_data.csv"), | |
| encoding="utf-8", | |
| split="train", | |
| ) | |
| # convert string to lists | |
| import ast | |
| def parse_terms(example): | |
| example["terms"] = ast.literal_eval(example["terms"]) | |
| return example | |
| dataset = dataset.map(parse_terms) | |
| return dataset | |
| def create_prompt(title, summary): | |
| """ | |
| Create a prompt for the model from the title and summary. | |
| """ | |
| return f"# title:\n{title}\n# abstract:\n{summary}" | |