Sleepyp00 commited on
Commit
cb1b925
·
1 Parent(s): a2a775d
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +71 -0
  3. finetune_net.py +37 -0
  4. requirements.txt +26 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **.venv/
2
+ **__pycache__/
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import hopsworks
3
+ import torch
4
+ import joblib
5
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+
7
+ project = hopsworks.login()
8
+ fs = project.get_feature_store()
9
+
10
+ mr = project.get_model_registry()
11
+ classifier = mr.get_model("base_classifier", version = 1)
12
+ model_dir = classifier.download()
13
+ classifier = joblib.load(model_dir + "/base_classifier.pkl")
14
+
15
+ embedding_model = mr.get_model("news_embedding", version = 1)
16
+ model_dir = embedding_model.download()
17
+ embedding_model = joblib.load(model_dir + "/news_embedding.pkl")
18
+
19
+
20
+ index_to_category = {
21
+ 0:"Polititcs",
22
+ 1:"Science",
23
+ 2:"Entertainment",
24
+ 3:"Sports",
25
+ 4:"Business"
26
+ }
27
+
28
+ sample_text = [
29
+ [
30
+ """
31
+ Alan Horn, longtime film executive, to retire from Disney
32
+ Alan Horn, the film executive who helped turn Walt Disney Studios into the most powerful movie studios
33
+ in Hollywood and whose 50-year career has touched films from from “When Harry Met Sally...” to “The Dark Knight,” is retiring.”"""
34
+ ],
35
+ [
36
+ """JRisks of US electoral chaos deepen after Trump is barred from another state ballot
37
+ """
38
+
39
+ ],
40
+ ["Engineers Working to Resolve Issue With Voyager 1 Computer"],
41
+ ["""Nick David at the double as Harlequins put on a show against Gloucester
42
+ Harlequins 32-26 Gloucester
43
+ Visitors rally late but Quins make Twickenham advantage count
44
+ Gerard Meagher at Twickenham
45
+ It was not long ago that Marcus Smith was still adjusting to being back in the fly-half jersey after the World Cup but, make no mistake, he is in the groove now. Smith ultimately proved the difference as Harlequins put on a show in their annual fixture here before Gloucester rallied and almost threatened the most unlikely of comebacks late in the game.
46
+ """]
47
+ ]
48
+
49
+ description = """
50
+ This app will provide classifications for text from a news article.
51
+ The input is currently truncated at around 400 words so make sure to include the most important part of the article.
52
+ """
53
+
54
+
55
+
56
+ def predict(text):
57
+ embedding = embedding_model.encode([text])
58
+ with torch.no_grad():
59
+ embedding = torch.tensor(embedding, device=device, dtype=torch.float32)
60
+ probs = classifier.probabilities(embedding).cpu().numpy()
61
+ return {index_to_category[i]: float(conf) for i, conf in enumerate(probs[0])}
62
+
63
+
64
+ gr.Interface(
65
+ predict,
66
+ inputs=gr.Textbox(label="Article"),
67
+ outputs="label",
68
+ theme="huggingface",
69
+ examples=sample_text,
70
+ description=description,
71
+ ).launch()
finetune_net.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch import nn
2
+ import torch
3
+ import torch.nn.functional as F
4
+
5
+ class Network(nn.Module):
6
+ def __init__(self,input_dim:int, output_dim:int, layer_widths:list = []) -> None:
7
+ super().__init__()
8
+
9
+ self.input_dim = input_dim
10
+ self.layer_widths = layer_widths
11
+ self.output_dim = output_dim
12
+
13
+
14
+
15
+ if len(layer_widths) > 0:
16
+ self.FC_initial = nn.Linear(input_dim, layer_widths[0])
17
+ self.hidden_layers = self.prepare_hidden_layers(layer_widths)
18
+ self.FC_final = nn.Linear(layer_widths[-1], output_dim)
19
+ else:
20
+ self.FC_initial = nn.Linear(input_dim, output_dim)
21
+ self.hidden_layers = nn.Sequential()
22
+ self.FC_final = nn.Sequential()
23
+
24
+
25
+ def prepare_hidden_layers(self, layer_widths):
26
+ hidden_layers = [nn.Sequential(nn.Linear(layer_widths[i], layer_widths[i+1]), nn.ReLU()) for i in range(len(layer_widths) - 1)]
27
+ #hidden_layers.append(nn.ReLU())
28
+ return nn.Sequential(*hidden_layers)
29
+
30
+ def forward(self, x):
31
+ out = F.relu(self.FC_initial(x))
32
+ out = self.hidden_layers(out)
33
+ out = self.FC_final(out)
34
+ return out
35
+
36
+ def probabilities(self, x):
37
+ return F.softmax(self.forward(x), dim = -1)
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bertopic==0.16.0
2
+ datasets==2.15.0
3
+ gradio==4.11.0
4
+ hdbscan==0.8.33
5
+ hopsworks==3.4.3
6
+ hsfs==3.4.5
7
+ huggingface-hub==0.20.1
8
+ joblib==1.3.2
9
+ matplotlib==3.8.2
10
+ numpy==1.26.2
11
+ pandas==2.0.3
12
+ requests==2.31.0
13
+ scikit-learn==1.3.2
14
+ scipy==1.11.4
15
+ sentence-transformers==2.2.2
16
+ tqdm==4.66.1
17
+ transformers==4.36.2
18
+ twofish==0.3.0
19
+ umap-learn==0.5.5
20
+
21
+ --extra-index-url https://download.pytorch.org/whl/cu118
22
+ torch
23
+ torchvision
24
+ torchaudio
25
+
26
+