Spaces:

Sleepyp00
/

News_Category_Classifier

Runtime error

App Files Files Community

Sleepyp00 commited on Dec 30, 2023

Commit

cb1b925

1 Parent(s): a2a775d

app

Browse files

Files changed (4) hide show

.gitignore +2 -0
app.py +71 -0
finetune_net.py +37 -0
requirements.txt +26 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ **.venv/
2	+ **__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+import hopsworks
+import torch
+import joblib
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+project = hopsworks.login()
+fs = project.get_feature_store()
+mr = project.get_model_registry()
+classifier = mr.get_model("base_classifier", version = 1)
+model_dir = classifier.download()
+classifier = joblib.load(model_dir + "/base_classifier.pkl")
+embedding_model = mr.get_model("news_embedding", version = 1)
+model_dir = embedding_model.download()
+embedding_model = joblib.load(model_dir + "/news_embedding.pkl")
+index_to_category = {
+    0:"Polititcs",
+    1:"Science",
+    2:"Entertainment",
+    3:"Sports",
+    4:"Business"
+}
+sample_text = [
+    [
+        """
+    Alan Horn, longtime film executive, to retire from Disney
+Alan Horn, the film executive who helped turn Walt Disney Studios into the most powerful movie studios
+ in Hollywood and whose 50-year career has touched films from from “When Harry Met Sally...” to “The Dark Knight,” is retiring.”"""
+    ],
+    [
+        """JRisks of US electoral chaos deepen after Trump is barred from another state ballot
+        """
+    ],
+    ["Engineers Working to Resolve Issue With Voyager 1 Computer"],
+    ["""Nick David at the double as Harlequins put on a show against Gloucester
+Harlequins 32-26 Gloucester
+Visitors rally late but Quins make Twickenham advantage count
+Gerard Meagher at Twickenham
+It was not long ago that Marcus Smith was still adjusting to being back in the fly-half jersey after the World Cup but, make no mistake, he is in the groove now. Smith ultimately proved the difference as Harlequins put on a show in their annual fixture here before Gloucester rallied and almost threatened the most unlikely of comebacks late in the game.
+"""]
+]
+description = """
+This app will provide classifications for text from a news article.
+The input is currently truncated at around 400 words so make sure to include the most important part of the article.
+"""
+def predict(text):
+    embedding = embedding_model.encode([text])
+    with torch.no_grad():
+        embedding = torch.tensor(embedding, device=device, dtype=torch.float32)
+        probs = classifier.probabilities(embedding).cpu().numpy()
+    return {index_to_category[i]: float(conf) for i, conf in enumerate(probs[0])}
+gr.Interface(
+    predict,
+    inputs=gr.Textbox(label="Article"),
+    outputs="label",
+    theme="huggingface",
+    examples=sample_text,
+    description=description,
+).launch()

finetune_net.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from torch import nn
+import torch
+import torch.nn.functional as F
+class Network(nn.Module):
+    def __init__(self,input_dim:int, output_dim:int, layer_widths:list = []) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.layer_widths = layer_widths
+        self.output_dim = output_dim
+        if len(layer_widths) > 0:
+            self.FC_initial = nn.Linear(input_dim, layer_widths[0])
+            self.hidden_layers = self.prepare_hidden_layers(layer_widths)
+            self.FC_final = nn.Linear(layer_widths[-1], output_dim)
+        else:
+            self.FC_initial = nn.Linear(input_dim, output_dim)
+            self.hidden_layers = nn.Sequential()
+            self.FC_final = nn.Sequential()
+    def prepare_hidden_layers(self, layer_widths):
+        hidden_layers = [nn.Sequential(nn.Linear(layer_widths[i], layer_widths[i+1]), nn.ReLU()) for i in range(len(layer_widths) - 1)]
+        #hidden_layers.append(nn.ReLU())
+        return nn.Sequential(*hidden_layers)
+    def forward(self, x):
+        out = F.relu(self.FC_initial(x))
+        out = self.hidden_layers(out)
+        out = self.FC_final(out)
+        return out
+    def probabilities(self, x):
+        return F.softmax(self.forward(x), dim = -1)

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+bertopic==0.16.0
+datasets==2.15.0
+gradio==4.11.0
+hdbscan==0.8.33
+hopsworks==3.4.3
+hsfs==3.4.5
+huggingface-hub==0.20.1
+joblib==1.3.2
+matplotlib==3.8.2
+numpy==1.26.2
+pandas==2.0.3
+requests==2.31.0
+scikit-learn==1.3.2
+scipy==1.11.4
+sentence-transformers==2.2.2
+tqdm==4.66.1
+transformers==4.36.2
+twofish==0.3.0
+umap-learn==0.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchvision
+torchaudio