Spaces:

ryandt
/

Inverting-Embeddings

Running on Zero

App Files Files Community

ryandt commited on Feb 5

Commit

248ed0c

1 Parent(s): 50948cd

Update to the zero gpu config

Browse files

Files changed (1) hide show

app.py +21 -10

app.py CHANGED Viewed

@@ -88,11 +88,20 @@ def _run_beam_search_threaded(
     target_emb, encoder_name, prompt,
     beam_width, top_k, patience, max_steps, min_similarity, randomness,
     progress_queue,
 ):
-    """Run beam search on GPU, pushing step updates to a queue."""
     llm, tokenizer = load_llm()
     encoder = load_encoder(encoder_name)
     step_count = 0
     def on_step(step, cand):
@@ -114,7 +123,7 @@ def _run_beam_search_threaded(
     )
     elapsed = time.time() - t0
     progress_queue.put(_SENTINEL)
-    return result, elapsed, step_count
 def run_stage(
@@ -135,11 +144,6 @@ def run_stage(
     stage_num = len(stage_results_state) + 1
-    # Encode target on first stage
-    if stage_num == 1:
-        encoder = load_encoder(encoder_name)
-        target_emb_state = encode_text(text.strip(), encoder)
     # Build prompt
     if stage_num == 1:
         prompt = _STAGE1_PROMPT
@@ -147,21 +151,26 @@ def run_stage(
         prev_text = stage_results_state[-1]["text"]
         prompt = _STAGE2_PROMPT_TEMPLATE.format(seed=prev_text)
     # Run beam search in a thread so we can yield progress
     progress_q = queue.Queue()
     # Container for the thread's return value
-    result_holder = [None, 0.0, 0]
     def _worker():
-        r, elapsed, steps = _run_beam_search_threaded(
             target_emb_state, encoder_name, prompt,
             beam_width, top_k, patience, max_steps, min_similarity, randomness,
             progress_q,
         )
         result_holder[0] = r
         result_holder[1] = elapsed
         result_holder[2] = steps
     worker = threading.Thread(target=_worker)
     worker.start()
@@ -190,7 +199,9 @@ def run_stage(
     worker.join()
-    result, elapsed, steps = result_holder
     stage_results_state = stage_results_state + [{
         "stage": stage_num,
         "text": result.seq_str,

     target_emb, encoder_name, prompt,
     beam_width, top_k, patience, max_steps, min_similarity, randomness,
     progress_queue,
+    encode_text_input=None,
 ):
+    """Run beam search on GPU, pushing step updates to a queue.
+    If encode_text_input is provided and target_emb is None, encodes
+    the text to produce the target embedding (Stage 1). This keeps
+    all CUDA operations inside the @spaces.GPU context.
+    """
     llm, tokenizer = load_llm()
     encoder = load_encoder(encoder_name)
+    if target_emb is None and encode_text_input is not None:
+        target_emb = encode_text(encode_text_input, encoder)
     step_count = 0
     def on_step(step, cand):
     )
     elapsed = time.time() - t0
     progress_queue.put(_SENTINEL)
+    return result, elapsed, step_count, target_emb
 def run_stage(
     stage_num = len(stage_results_state) + 1
     # Build prompt
     if stage_num == 1:
         prompt = _STAGE1_PROMPT
         prev_text = stage_results_state[-1]["text"]
         prompt = _STAGE2_PROMPT_TEMPLATE.format(seed=prev_text)
+    # On Stage 1, pass raw text so encoding happens inside GPU context
+    encode_input = text.strip() if stage_num == 1 else None
     # Run beam search in a thread so we can yield progress
     progress_q = queue.Queue()
     # Container for the thread's return value
+    result_holder = [None, 0.0, 0, None]
     def _worker():
+        r, elapsed, steps, emb = _run_beam_search_threaded(
             target_emb_state, encoder_name, prompt,
             beam_width, top_k, patience, max_steps, min_similarity, randomness,
             progress_q,
+            encode_text_input=encode_input,
         )
         result_holder[0] = r
         result_holder[1] = elapsed
         result_holder[2] = steps
+        result_holder[3] = emb
     worker = threading.Thread(target=_worker)
     worker.start()
     worker.join()
+    result, elapsed, steps, returned_emb = result_holder
+    if returned_emb is not None:
+        target_emb_state = returned_emb
     stage_results_state = stage_results_state + [{
         "stage": stage_num,
         "text": result.seq_str,