Muhamed-Kheir commited on
Commit
aa33062
·
verified ·
1 Parent(s): 9d51476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -20
app.py CHANGED
@@ -1,20 +1,122 @@
1
- import gradio as gr
2
-
3
- def show_instructions():
4
- return (
5
- "This Space hosts a command-line k-mer analysis script.\n\n"
6
- "Run it locally like this:\n"
7
- "python kmer_unique.py --group-dirs path/to/groupA path/to/groupB "
8
- "--k-min 1 --k-max 50 --min-freq 5 --outdir kmer_results\n"
9
- )
10
-
11
- demo = gr.Interface(
12
- fn=show_instructions,
13
- inputs=[],
14
- outputs="text",
15
- title="Unique k-mer Analysis (CLI tool)",
16
- description="This Space is a Gradio wrapper so the repository builds. The main tool is kmer_unique.py."
17
- )
18
-
19
- if __name__ == "__main__":
20
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import shutil
4
+ import uuid
5
+ import zipfile
6
+
7
+ import gradio as gr
8
+
9
+ # Ensure repo root is importable on Spaces
10
+ sys.path.append(os.path.dirname(__file__))
11
+
12
+ import kmer_predict # must be in repo root
13
+
14
+
15
+ PERSIST_BASE = "/tmp/kmer_predict_runs"
16
+ FASTA_EXTS = (".fa", ".fasta", ".fas", ".fna")
17
+
18
+
19
+ def _zip_dir(folder: str, zip_path: str) -> None:
20
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
21
+ for root, _, files in os.walk(folder):
22
+ for fn in files:
23
+ full = os.path.join(root, fn)
24
+ rel = os.path.relpath(full, folder)
25
+ z.write(full, rel)
26
+
27
+
28
+ def run_prediction(unknown_files, kmer_zip, seqtype, mode, identity, coverage, fdr):
29
+ if not unknown_files:
30
+ raise gr.Error("Please upload at least one unknown FASTA file.")
31
+ if not kmer_zip:
32
+ raise gr.Error("Please upload the k-mer results ZIP from Space 1.")
33
+
34
+ os.makedirs(PERSIST_BASE, exist_ok=True)
35
+ run_id = uuid.uuid4().hex[:10]
36
+ run_dir = os.path.join(PERSIST_BASE, f"run_{run_id}")
37
+ os.makedirs(run_dir, exist_ok=True)
38
+
39
+ unknown_dir = os.path.join(run_dir, "unknown")
40
+ outdir = os.path.join(run_dir, "predictions")
41
+ os.makedirs(unknown_dir, exist_ok=True)
42
+ os.makedirs(outdir, exist_ok=True)
43
+
44
+ # Copy unknown FASTAs
45
+ for idx, f in enumerate(unknown_files, start=1):
46
+ src = getattr(f, "path", None) or getattr(f, "name", None) or str(f)
47
+ orig = getattr(f, "orig_name", None) or getattr(f, "filename", None) or os.path.basename(src)
48
+
49
+ # Ensure a valid fasta extension for readability (parser doesn't require it, but it's cleaner)
50
+ if not orig.lower().endswith(FASTA_EXTS):
51
+ orig = f"unknown_{idx}.fasta"
52
+
53
+ shutil.copy(src, os.path.join(unknown_dir, os.path.basename(orig)))
54
+
55
+ # K-mer ZIP path (ZIP-only)
56
+ kmer_zip_path = getattr(kmer_zip, "path", None) or getattr(kmer_zip, "name", None) or str(kmer_zip)
57
+ if not str(kmer_zip_path).lower().endswith(".zip"):
58
+ raise gr.Error("K-mer input must be a .zip file from Space 1.")
59
+
60
+ # Run prediction
61
+ kmer_predict.predict(
62
+ unknown=unknown_dir,
63
+ kmer_input=kmer_zip_path,
64
+ output_dir=outdir,
65
+ seqtype=seqtype,
66
+ mode=mode,
67
+ identity_threshold=float(identity),
68
+ min_coverage=float(coverage),
69
+ fdr_alpha=float(fdr),
70
+ group_regex=kmer_predict.DEFAULT_GROUP_REGEX,
71
+ )
72
+
73
+ plot_path = os.path.join(outdir, "predicted_results_summary.png")
74
+ csv_path = os.path.join(outdir, "predictions_by_alignment.csv")
75
+
76
+ zip_path = os.path.join(run_dir, "prediction_outputs.zip")
77
+ _zip_dir(outdir, zip_path)
78
+
79
+ return plot_path, csv_path, zip_path
80
+
81
+
82
+ with gr.Blocks() as demo:
83
+ gr.Markdown("# K-mer Sequence Predictor")
84
+ gr.Markdown(
85
+ "Upload unknown FASTA sequences and the **kmer_results.zip** produced by the Unique k-mer Space."
86
+ )
87
+
88
+ unknown_files = gr.File(
89
+ label="Unknown FASTA files",
90
+ file_count="multiple",
91
+ file_types=[".fa", ".fasta", ".fas", ".fna"],
92
+ )
93
+
94
+ kmer_zip = gr.File(
95
+ label="kmer_results.zip (from Space 1)",
96
+ file_count="single",
97
+ file_types=[".zip"],
98
+ )
99
+
100
+ with gr.Row():
101
+ seqtype = gr.Radio(["dna", "protein"], value="dna", label="Sequence type")
102
+ mode = gr.Radio(["fast", "full"], value="fast", label="Mode")
103
+
104
+ with gr.Row():
105
+ identity = gr.Number(value=0.90, precision=2, label="Identity (full mode)")
106
+ coverage = gr.Number(value=0.80, precision=2, label="Coverage (full mode)")
107
+ fdr = gr.Number(value=0.05, precision=3, label="FDR alpha (full mode)")
108
+
109
+ run_btn = gr.Button("Run prediction")
110
+
111
+ out_plot = gr.Image(label="Prediction summary plot")
112
+ out_csv = gr.File(label="Predictions CSV")
113
+ out_zip = gr.File(label="Download all outputs (ZIP)")
114
+
115
+ run_btn.click(
116
+ fn=run_prediction,
117
+ inputs=[unknown_files, kmer_zip, seqtype, mode, identity, coverage, fdr],
118
+ outputs=[out_plot, out_csv, out_zip],
119
+ )
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch()