Spaces:

1-1-3-8
/

test_model

Sleeping

App Files Files Community

1-1-3-8 commited on Oct 9, 2025

Commit

c94bb03

verified ·

1 Parent(s): 9e399de

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -60

app.py CHANGED Viewed

@@ -51,10 +51,11 @@ def _precompute_can_open(seq, min_loop=3, allow_gu=True):
                 break
     return can
 # --- constrained processor ---
 class BalancedParenProcessor(LogitsProcessor):
     def __init__(self, lp_id, rp_id, dot_id, total_len, can_open,
-                 dot_bias=0.8, paren_penalty=0.5, window=5):
         self.lp_id, self.rp_id, self.dot_id = lp_id, rp_id, dot_id
         self.total_len = total_len
         self.step = 0
@@ -66,76 +67,77 @@ class BalancedParenProcessor(LogitsProcessor):
         self.window=window
     def __call__(self, input_ids, scores):
-        mask=torch.full_like(scores,float("-inf"))
-        remaining=self.total_len-self.step
-        allowed=[]
-        must_close=(remaining==self.depth and self.depth>0)
-        pos=self.step
         if must_close:
-            allowed=[self.rp_id]
         else:
-            if self.depth>0:
                 allowed.append(self.rp_id)
-            if remaining-1>self.depth and pos<len(self.can_open) and self.can_open[pos]:
                 allowed.append(self.lp_id)
             allowed.append(self.dot_id)
-        mask[:,allowed]=0.0
-        scores=scores+mask
-        scores[:,self.dot_id]+=self.dot_bias
-        if len(self.history)>=self.window and all(t in (self.lp_id,self.rp_id) for t in self.history[-self.window:]):
-            scores[:,self.lp_id]-=self.paren_penalty
-            scores[:,self.rp_id]-=self.paren_penalty
-        return scores
-    def update(self, tok):
-        if tok==self.lp_id:
-            self.depth+=1
-        elif tok==self.rp_id:
-            self.depth=max(0,self.depth-1)
-        self.history.append(tok)
-        self.step+=1
-def _top_p_sample(logits, top_p=0.9, temperature=0.8):
-    logits=logits/temperature
-    probs=torch.softmax(logits,dim=-1)
-    sorted_probs,sorted_idx=torch.sort(probs,descending=True)
-    cum=torch.cumsum(sorted_probs,dim=-1)
-    mask=cum>top_p
-    mask[...,0]=False
-    sorted_probs[mask]=0
-    sorted_probs/=sorted_probs.sum(dim=-1,keepdim=True)
-    idx=torch.multinomial(sorted_probs,1)
-    return sorted_idx.gather(-1,idx).squeeze(-1)
 # --- generator ---
 def _generate_db(seq):
-    tok,model,device=_load_model_and_tokenizer()
-    n=len(seq)
-    prompt=f"RNA: {seq}\nDot-bracket (exactly {n} characters using only '(' ')' '.'):\n"
-    lp=_char_token_id(tok,"("); rp=_char_token_id(tok,")"); dot=_char_token_id(tok,".")
-    can=_precompute_can_open(seq)
-    proc=BalancedParenProcessor(lp,rp,dot,n,can)
-    procs=LogitsProcessorList([proc])
-    inputs=tok(prompt,return_tensors="pt").to(device)
-    cur=inputs["input_ids"]
-    generated=[]
-    for _ in range(n):
-        out=model(cur)
-        logits=out.logits[:,-1,:]
-        for p in procs:
-            logits=p(cur,logits)
-        next_id=_top_p_sample(logits,0.9,0.8)
-        next_id=next_id.to(device)
-        tokid=next_id.item()
-        generated.append(tokid)
-        proc.update(tokid)
-        cur=torch.cat([cur,next_id.view(1,1)],dim=1)
-    text=tok.decode(generated,skip_special_tokens=True)
-    db="".join(c for c in text if c in "().")[:n]
-    if len(db)!=n:
-        db=(db+"."*n)[:n]
     return db
 # --- structural element translation ---
 def dotbracket_to_structural(dot_str):
     if not dot_str: return "<start><external_loop><end>"

                 break
     return can
+# --- constrained processor ---
 # --- constrained processor ---
 class BalancedParenProcessor(LogitsProcessor):
     def __init__(self, lp_id, rp_id, dot_id, total_len, can_open,
+                 dot_bias=0.0, paren_penalty=0.0, window=5):
         self.lp_id, self.rp_id, self.dot_id = lp_id, rp_id, dot_id
         self.total_len = total_len
         self.step = 0
         self.window=window
     def __call__(self, input_ids, scores):
+        # restrict to only three tokens
+        mask = torch.full_like(scores, float("-inf"))
+        remaining = self.total_len - self.step
+        allowed = []
+        # If we must close to avoid running out of room, force )
+        must_close = (remaining == self.depth and self.depth > 0)
+        pos = self.step
         if must_close:
+            allowed = [self.rp_id]
         else:
+            if self.depth > 0:
                 allowed.append(self.rp_id)
+            # allow opening if there will still be room to close later
+            # (be a bit less strict than remaining-1 > depth to encourage stems)
+            if remaining - 2 >= self.depth and pos < len(self.can_open) and self.can_open[pos]:
                 allowed.append(self.lp_id)
             allowed.append(self.dot_id)
+        mask[:, allowed] = 0.0
+        scores = scores + mask
+        # (no dot boost by default)
+        if self.dot_bias != 0.0:
+            scores[:, self.dot_id] += self.dot_bias
+        # optional mild anti-run regularizer
+        if self.paren_penalty and len(self.history) >= self.window and all(
+            t in (self.lp_id, self.rp_id) for t in self.history[-self.window:]
+        ):
+            scores[:, self.lp_id] -= self.paren_penalty
+            scores[:, self.rp_id] -= self.paren_penalty
+        return scores
 # --- generator ---
 def _generate_db(seq):
+    tok, model, device = _load_model_and_tokenizer()
+    n = len(seq)
+    prompt = f"RNA: {seq}\nDot-bracket (exactly {n} characters using only '(' ')' '.'):\n"
+    lp = _char_token_id(tok, "("); rp = _char_token_id(tok, ")"); dot = _char_token_id(tok, ".")
+    can = _precompute_can_open(seq, min_loop=3)  # try 2 if you still get few stems
+    proc = BalancedParenProcessor(lp, rp, dot, n, can, dot_bias=0.0, paren_penalty=0.0)
+    procs = LogitsProcessorList([proc])
+    inputs = tok(prompt, return_tensors="pt").to(device)
+    cur = inputs["input_ids"]
+    generated = []
+    with torch.no_grad():
+        for _ in range(n):
+            out = model(cur)
+            logits = out.logits[:, -1, :]
+            for p in procs:
+                logits = p(cur, logits)
+            next_id = _top_p_sample(logits, top_p=0.9, temperature=0.8)
+            next_id = next_id.to(device)
+            tokid = next_id.item()
+            generated.append(tokid)
+            proc.update(tokid)
+            cur = torch.cat([cur, next_id.view(1, 1)], dim=1)
+    text = tok.decode(generated, skip_special_tokens=True)
+    db = "".join(c for c in text if c in "().")[:n]
+    if len(db) != n:
+        db = (db + "." * n)[:n]
     return db
 # --- structural element translation ---
 def dotbracket_to_structural(dot_str):
     if not dot_str: return "<start><external_loop><end>"