ansar-y0usif commited on
Commit
d910243
·
verified ·
1 Parent(s): 24d1289

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ f-gene_verification/model/best_model.keras filter=lfs diff=lfs merge=lfs -text
f-gene_verification/Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . /app
6
+
7
+ RUN pip install --upgrade pip
8
+ RUN pip install -r requirements.txt
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
f-gene_verification/main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import pickle
4
+ import numpy as np
5
+ from tensorflow.keras.models import load_model
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+
8
+ # Load model artifacts
9
+ model = load_model("model/best_model.keras")
10
+
11
+ with open("model/kmer_to_index.pkl", "rb") as f:
12
+ kmer_to_index = pickle.load(f)
13
+
14
+ with open("model/maxlen.txt", "r") as f:
15
+ maxlen = int(f.read().strip())
16
+
17
+ # K-mer tokenizer
18
+ def kmer_tokenizer(seq, k=6):
19
+ return [seq[i:i + k] for i in range(len(seq) - k + 1)]
20
+
21
+ # Preprocess function
22
+ def preprocess_sequence(sequence: str):
23
+ tokens = kmer_tokenizer(sequence.upper())
24
+ encoded = [kmer_to_index.get(kmer, 0) for kmer in tokens]
25
+ padded = pad_sequences([encoded], maxlen=maxlen, padding='post')
26
+ return padded
27
+
28
+ # Define request schema
29
+ class SequenceInput(BaseModel):
30
+ sequence: str
31
+
32
+ # Class labels (optional)
33
+ LABELS = ["Random", "F", "P", "N", "M", "HN", "L"]
34
+
35
+ # Create app
36
+ app = FastAPI(title="Gene Classifier API")
37
+
38
+ @app.post("/predict")
39
+ def predict(input_data: SequenceInput):
40
+ seq = input_data.sequence.strip()
41
+ if len(seq) < 6:
42
+ raise HTTPException(status_code=400, detail="Sequence too short. Must be at least 6 bases.")
43
+
44
+ try:
45
+ padded = preprocess_sequence(seq)
46
+ pred = model.predict(padded)
47
+ predicted_class = int(np.argmax(pred))
48
+ label = LABELS[predicted_class]
49
+ confidence = float(np.max(pred))
50
+
51
+ # Custom logic
52
+ if label == "F":
53
+ return {
54
+ "status": "success",
55
+ "message": "F gene detected.",
56
+ "confidence": confidence
57
+ }
58
+ elif label == "Random":
59
+ return {
60
+ "status": "error",
61
+ "message": "Unidentified sequence detected. Make sure you're entering the F gene of the NDV.",
62
+ "confidence": confidence
63
+ }
64
+ else:
65
+ return {
66
+ "status": "error",
67
+ "message": "No F gene detected. Please enter an F gene.",
68
+ "predicted_label": label,
69
+ "confidence": confidence
70
+ }
71
+
72
+ except Exception as e:
73
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
f-gene_verification/model/best_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7649bfcea0816dffa5cd6b2490c9c7f08bae3ec2b5e39d171ee0f661429a4e
3
+ size 24844261
f-gene_verification/model/kmer_to_index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2a12751bc0417d7edd2379b2aecd79bd3fa52dafde1c376a7f604280798bcb
3
+ size 83542
f-gene_verification/model/maxlen.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 6649
f-gene_verification/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ tensorflow
4
+ scikit-learn
5
+ biopython
6
+ pydantic
7
+ numpy