Spaces:
Runtime error
Runtime error
Update cas9on.py (#4)
Browse files- Update cas9on.py (bf083c28f7799d4200a4b99c657ad0f124e70a49)
Co-authored-by: Qingyang Liu <LfOreVEr@users.noreply.huggingface.co>
cas9on.py
CHANGED
|
@@ -70,20 +70,24 @@ def fetch_ensembl_sequence(transcript_id):
|
|
| 70 |
print(f"Error fetching sequence data from Ensembl: {response.text}")
|
| 71 |
return None
|
| 72 |
|
| 73 |
-
def find_crispr_targets(sequence, chr, start, strand, transcript_id, exon_id, pam="NGG", target_length=20):
|
| 74 |
targets = []
|
| 75 |
len_sequence = len(sequence)
|
| 76 |
-
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
|
| 77 |
dnatorna = {'A': 'A', 'T': 'U', 'C': 'C', 'G': 'G'}
|
| 78 |
|
| 79 |
-
if strand == -1:
|
| 80 |
-
sequence = ''.join([complement[base] for base in sequence])
|
| 81 |
for i in range(len_sequence - len(pam) + 1):
|
| 82 |
if sequence[i + 1:i + 3] == pam[1:]:
|
| 83 |
if i >= target_length:
|
| 84 |
target_seq = sequence[i - target_length:i + 3]
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
gRNA = ''.join([dnatorna[base] for base in sequence[i - target_length:i]])
|
| 88 |
targets.append([target_seq, gRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id, exon_id])
|
| 89 |
|
|
@@ -131,9 +135,10 @@ def process_gene(gene_symbol, model_path):
|
|
| 131 |
if gene_sequence:
|
| 132 |
all_gene_sequences.append(gene_sequence) # Add this gene sequence to the list
|
| 133 |
start = exon['start']
|
|
|
|
| 134 |
strand = exon['strand']
|
| 135 |
chr = exon['seq_region_name']
|
| 136 |
-
targets = find_crispr_targets(gene_sequence, chr, start, strand, transcript_id, exon_id)
|
| 137 |
if targets:
|
| 138 |
# Predict on-target efficiency for each gRNA site
|
| 139 |
formatted_data = format_prediction_output(targets, model_path)
|
|
|
|
| 70 |
print(f"Error fetching sequence data from Ensembl: {response.text}")
|
| 71 |
return None
|
| 72 |
|
| 73 |
+
def find_crispr_targets(sequence, chr, start, end, strand, transcript_id, exon_id, pam="NGG", target_length=20):
|
| 74 |
targets = []
|
| 75 |
len_sequence = len(sequence)
|
| 76 |
+
#complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
|
| 77 |
dnatorna = {'A': 'A', 'T': 'U', 'C': 'C', 'G': 'G'}
|
| 78 |
|
|
|
|
|
|
|
| 79 |
for i in range(len_sequence - len(pam) + 1):
|
| 80 |
if sequence[i + 1:i + 3] == pam[1:]:
|
| 81 |
if i >= target_length:
|
| 82 |
target_seq = sequence[i - target_length:i + 3]
|
| 83 |
+
if strand == -1:
|
| 84 |
+
tar_start = end - (i + 2)
|
| 85 |
+
tar_end = end - (i - target_length)
|
| 86 |
+
#seq_in_ref = ''.join([complement[base] for base in target_seq])[::-1]
|
| 87 |
+
else:
|
| 88 |
+
tar_start = start + i - target_length
|
| 89 |
+
tar_end = start + i + 3 - 1
|
| 90 |
+
#seq_in_ref = target_seq
|
| 91 |
gRNA = ''.join([dnatorna[base] for base in sequence[i - target_length:i]])
|
| 92 |
targets.append([target_seq, gRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id, exon_id])
|
| 93 |
|
|
|
|
| 135 |
if gene_sequence:
|
| 136 |
all_gene_sequences.append(gene_sequence) # Add this gene sequence to the list
|
| 137 |
start = exon['start']
|
| 138 |
+
end = exon['end']
|
| 139 |
strand = exon['strand']
|
| 140 |
chr = exon['seq_region_name']
|
| 141 |
+
targets = find_crispr_targets(gene_sequence, chr, start, end, strand, transcript_id, exon_id)
|
| 142 |
if targets:
|
| 143 |
# Predict on-target efficiency for each gRNA site
|
| 144 |
formatted_data = format_prediction_output(targets, model_path)
|