shulik7 commited on
Commit
7cb3770
·
1 Parent(s): d50df92

implement the functions

Browse files
Files changed (3) hide show
  1. README.md +0 -1
  2. app.py +57 -8
  3. requirements.txt +5 -0
README.md CHANGED
@@ -11,4 +11,3 @@ license: mit
11
  short_description: Predict the probability of a chemical compound to be natural
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
11
  short_description: Predict the probability of a chemical compound to be natural
12
  ---
13
 
 
app.py CHANGED
@@ -1,15 +1,64 @@
1
  import gradio as gr
 
2
  import spaces
3
- import torch
4
 
5
- zero = torch.Tensor([0]).cuda()
6
- print(zero.device) # <-- 'cpu' 🤔
 
 
 
 
 
 
 
 
7
 
8
  @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' 🤗
11
- return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
- demo.launch()
 
 
 
 
 
 
15
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
  import spaces
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
5
 
6
+ # Global variables to store model and tokenizer
7
+ model = None
8
+ tokenizer = None
9
+
10
+ def load_model(model_path):
11
+ """Load the fine-tuned model and tokenizer from Hugging Face"""
12
+ global model, tokenizer
13
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
14
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
15
+ print(f"Model loaded from {model_path}")
16
 
17
  @spaces.GPU
18
+ def predict(input_text):
19
+ """Make prediction on the input text directly without creating a dataset"""
20
+ if model is None or tokenizer is None:
21
+ return "Error: Model not loaded"
22
+
23
+ model.to('cuda')
24
+ # Tokenize input directly
25
+ inputs = tokenizer(input_text, padding='max_length', truncation=True, max_length=512, return_tensors="pt")
26
+
27
+ # Move input tensors to GPU
28
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
29
+
30
+ # Get model predictions
31
+ outputs = model(**inputs)
32
+ logits = outputs.logits.detach().cpu().numpy()
33
+
34
+ # Stable softmax to get probabilities
35
+ exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
36
+ probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
37
+
38
+ # Get predicted label
39
+ pred_label = np.argmax(probs, axis=1)[0]
40
+ # Map prediction to label
41
+ label_map = {0: "Unnatural", 1: "Natural"}
42
+ pred_label_text = label_map[pred_label]
43
+
44
+ # Format output
45
+ result = f"Prediction: {pred_label_text}\n"
46
+ natural_prob = probs[0][1] if pred_label == 1 else 1 - probs[0][0]
47
+ result += f"Natural Product Probability: {natural_prob:.4f}\n"
48
+
49
+ return result
50
+
51
+ # Load model on initialization
52
+ load_model("shulik7/NP_SMILES_tokenized_PubChem_shard00_160k")
53
 
54
+ # Create Gradio interface
55
+ iface = gr.Interface(
56
+ fn=predict,
57
+ inputs=gr.Textbox(lines=5, placeholder="Enter the SMILES here..."),
58
+ outputs=gr.Textbox(label="Prediction Results"),
59
+ title="Naturalness Prediction",
60
+ description="Enter SMILES string to get the prediction from the fine-tuned ChemBERTa model."
61
+ )
62
 
63
+ if __name__ == "__main__":
64
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ spaces
3
+ numpy
4
+ transformers
5
+ torch