bernardo-de-almeida commited on
Commit
516378d
·
1 Parent(s): 89c5f16

check seq length

Browse files
Files changed (2) hide show
  1. app.py +13 -2
  2. requirements.txt +9 -8
app.py CHANGED
@@ -40,6 +40,7 @@ if HF_TOKEN is None:
40
 
41
  PLOT_TARGET_POINTS = int(os.environ.get("PLOT_TARGET_POINTS", "1500"))
42
  SEARCH_MAX_RESULTS = int(os.environ.get("SEARCH_MAX_RESULTS", "20"))
 
43
 
44
  # -----------------------------
45
  # Load pipeline (reloadable)
@@ -553,6 +554,17 @@ def predict(
553
  raise gr.Error("chrom is required when use_coords=True")
554
  if start is None or end is None or int(end) <= int(start):
555
  raise gr.Error("start/end must be set and end > start when use_coords=True")
 
 
 
 
 
 
 
 
 
 
 
556
  inputs = {
557
  "chrom": chrom,
558
  "start": int(start),
@@ -563,9 +575,8 @@ def predict(
563
  if not seq or not seq.strip():
564
  raise gr.Error("seq is required when use_coords=False")
565
  seq_stripped = seq.strip()
566
- # Check sequence size: max 1MB (1,048,576 bytes)
567
  # Each character is typically 1 byte, so check length
568
- MAX_SEQUENCE_SIZE = 1_048_576 # 1MB in bytes
569
  if len(seq_stripped) > MAX_SEQUENCE_SIZE:
570
  raise gr.Error(
571
  f"Sequence input is too large ({len(seq_stripped):,} characters). "
 
40
 
41
  PLOT_TARGET_POINTS = int(os.environ.get("PLOT_TARGET_POINTS", "1500"))
42
  SEARCH_MAX_RESULTS = int(os.environ.get("SEARCH_MAX_RESULTS", "20"))
43
+ MAX_SEQUENCE_SIZE = 1_048_576 # 1MB in bytes - maximum allowed sequence input size
44
 
45
  # -----------------------------
46
  # Load pipeline (reloadable)
 
554
  raise gr.Error("chrom is required when use_coords=True")
555
  if start is None or end is None or int(end) <= int(start):
556
  raise gr.Error("start/end must be set and end > start when use_coords=True")
557
+
558
+ # Check sequence size before fetching from API: max 1MB
559
+ # Each base pair is typically 1 byte, so check region length
560
+ region_length = int(end) - int(start)
561
+ if region_length > MAX_SEQUENCE_SIZE:
562
+ raise gr.Error(
563
+ f"Requested genomic region is too large ({region_length:,} base pairs). "
564
+ f"Maximum allowed size is {MAX_SEQUENCE_SIZE:,} base pairs (1MB). "
565
+ f"Please select a smaller region."
566
+ )
567
+
568
  inputs = {
569
  "chrom": chrom,
570
  "start": int(start),
 
575
  if not seq or not seq.strip():
576
  raise gr.Error("seq is required when use_coords=False")
577
  seq_stripped = seq.strip()
578
+ # Check sequence size: max 1MB
579
  # Each character is typically 1 byte, so check length
 
580
  if len(seq_stripped) > MAX_SEQUENCE_SIZE:
581
  raise gr.Error(
582
  f"Sequence input is too large ({len(seq_stripped):,} characters). "
requirements.txt CHANGED
@@ -1,9 +1,10 @@
1
- gradio==6.1.0
2
- kaleido==1.2.0
3
- matplotlib==3.10.8
4
- numpy==2.3.5
5
- plotly==6.5.0
6
- pyBigWig==0.3.24
7
- requests==2.32.5
 
8
  torch
9
- transformers==4.57.3
 
1
+ gradio>=4.0.0
2
+ kaleido
3
+ matplotlib
4
+ numpy
5
+ plotly
6
+ pyBigWig
7
+ pyfaidx
8
+ requests
9
  torch
10
+ transformers>=4.41.0