Spaces:
Sleeping
Sleeping
Commit
·
2904d0e
1
Parent(s):
fef1ab3
added loading of my model
Browse files- app.py +14 -1
- dataset.py +40 -0
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import random
|
|
@@ -7,13 +8,25 @@ from diffusers import DiffusionPipeline
|
|
| 7 |
import torch
|
| 8 |
|
| 9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 10 |
-
model_repo_id = "stabilityai/sdxl-turbo"
|
| 11 |
|
| 12 |
if torch.cuda.is_available():
|
| 13 |
torch_dtype = torch.float16
|
| 14 |
else:
|
| 15 |
torch_dtype = torch.float32
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
| 18 |
pipe = pipe.to(device)
|
| 19 |
|
|
|
|
| 1 |
+
from transformers import AutoModelForSequenceClassification
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
| 4 |
import random
|
|
|
|
| 8 |
import torch
|
| 9 |
|
| 10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
+
model_repo_id = "stabilityai/sdxl-turbo"
|
| 12 |
|
| 13 |
if torch.cuda.is_available():
|
| 14 |
torch_dtype = torch.float16
|
| 15 |
else:
|
| 16 |
torch_dtype = torch.float32
|
| 17 |
|
| 18 |
+
|
| 19 |
+
from article_classifier.dataset import labels, id2label, label2id, categorie2human
|
| 20 |
+
|
| 21 |
+
model_path = "distilbert/distilbert-base-cased" # todo, replace with hacker1337/article-classifier
|
| 22 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 23 |
+
model_path,
|
| 24 |
+
num_labels=len(id2label),
|
| 25 |
+
id2label=id2label,
|
| 26 |
+
label2id=label2id,
|
| 27 |
+
problem_type="multi_label_classification",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
| 31 |
pipe = pipe.to(device)
|
| 32 |
|
dataset.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
labels = ["CV", "AI", "ML", "NE", "CL"]
|
| 2 |
+
id2label = {i: label for i, label in enumerate(labels)}
|
| 3 |
+
label2id = {label: i for i, label in enumerate(labels)}
|
| 4 |
+
|
| 5 |
+
categorie2human = {
|
| 6 |
+
"CV": "Computer Vision",
|
| 7 |
+
"AI": "Artificial Intelligence",
|
| 8 |
+
"ML": "Machine Learning",
|
| 9 |
+
"NE": "Neural and Evolutionary Computing",
|
| 10 |
+
"CL": "Computation and Language"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_arxiv_dataset():
|
| 15 |
+
import kagglehub
|
| 16 |
+
import os
|
| 17 |
+
from datasets import load_dataset
|
| 18 |
+
|
| 19 |
+
# Download latest version
|
| 20 |
+
path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
dataset = load_dataset(
|
| 25 |
+
"csv",
|
| 26 |
+
data_files=os.path.join(path, "arxiv_data.csv"),
|
| 27 |
+
encoding="utf-8",
|
| 28 |
+
split="train"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# convert string to lists
|
| 32 |
+
import ast
|
| 33 |
+
|
| 34 |
+
def parse_terms(example):
|
| 35 |
+
example["terms"] = ast.literal_eval(example["terms"])
|
| 36 |
+
return example
|
| 37 |
+
|
| 38 |
+
dataset = dataset.map(parse_terms)
|
| 39 |
+
|
| 40 |
+
return dataset
|