Spaces:
Sleeping
Sleeping
dbleek
commited on
Commit
·
806796c
1
Parent(s):
4d1c892
new classifier
Browse files- milestone-3.py +1 -2
- milestone_2.py +0 -26
- patent_classification_v2.pt +3 -0
milestone-3.py
CHANGED
|
@@ -25,7 +25,7 @@ dataset = filtered_dataset.shuffle(seed=42).select(range(20))
|
|
| 25 |
dataset = dataset.sort("patent_number")
|
| 26 |
|
| 27 |
# Create pipeline using model trainned on Colab
|
| 28 |
-
model = torch.load("
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 30 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
| 31 |
|
|
@@ -74,4 +74,3 @@ if submitted:
|
|
| 74 |
pred, score
|
| 75 |
)
|
| 76 |
)
|
| 77 |
-
check = st.markdown("Actual Label: **{}**.".format(label))
|
|
|
|
| 25 |
dataset = dataset.sort("patent_number")
|
| 26 |
|
| 27 |
# Create pipeline using model trainned on Colab
|
| 28 |
+
model = torch.load("patent_classifier_v2.pt", map_location=torch.device("cpu"))
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 30 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
| 31 |
|
|
|
|
| 74 |
pred, score
|
| 75 |
)
|
| 76 |
)
|
|
|
milestone_2.py
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
from transformers import (AutoTokenizer, TFAutoModelForSequenceClassification,
|
| 3 |
-
pipeline)
|
| 4 |
-
|
| 5 |
-
st.title("CS-GY-6613 Project Milestone 2")
|
| 6 |
-
model_choices = (
|
| 7 |
-
"distilbert-base-uncased-finetuned-sst-2-english",
|
| 8 |
-
"j-hartmann/emotion-english-distilroberta-base",
|
| 9 |
-
"joeddav/distilbert-base-uncased-go-emotions-student",
|
| 10 |
-
)
|
| 11 |
-
|
| 12 |
-
with st.form("Input Form"):
|
| 13 |
-
text = st.text_area("Write your text here:", "CS-GY-6613 is a great course!")
|
| 14 |
-
model_name = st.selectbox("Select a model:", model_choices)
|
| 15 |
-
submitted = st.form_submit_button("Submit")
|
| 16 |
-
|
| 17 |
-
if submitted:
|
| 18 |
-
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
| 19 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 20 |
-
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
| 21 |
-
res = classifier(text)
|
| 22 |
-
label = res[0]["label"].upper()
|
| 23 |
-
score = res[0]["score"]
|
| 24 |
-
st.markdown(
|
| 25 |
-
f"This text was classified as **{label}** with a confidence score of **{score}**."
|
| 26 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
patent_classification_v2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fbbdc470f673703431aa31cc7451af0d0608df3bd6e7006ab32866803f4eece
|
| 3 |
+
size 267882633
|