Spaces:

sandz7
/

loki

Runtime error

sandz7 commited on May 26, 2024

Commit

0679bd1

1 Parent(s): 705763e

locked the thread before generation on llama

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,7 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from huggingface_hub import login
 import os
-from threading import Thread
-from openai import OpenAI
 import spaces
 # import multiprocessing as mp
 import sys
@@ -111,13 +110,19 @@ def llama_generation(input_text: str,
     if temperature == 0:
         generate_kwargs["do_sample"] = False
     # # Place the generation in a thread so we can access it.
     # # place the function as target and place the kwargs next as the kwargs
-    # thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
-    # thread.start()
-    # Multiprocessing to avoid pickle errors
-    thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
     thread.start()
     thread.join()
     return streamer

 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from huggingface_hub import login
 import os
+import threading
 import spaces
 # import multiprocessing as mp
 import sys
     if temperature == 0:
         generate_kwargs["do_sample"] = False
+    # Use a lock object to synchronize access to the llama_model
+    lock = threading.lock()
     # # Place the generation in a thread so we can access it.
     # # place the function as target and place the kwargs next as the kwargs
+    def generation_llama():
+        with lock:
+            # Generate response using Llama3
+            response = llama_model.generate(**generate_kwargs)
+            return response
+    # start the thread and wait for it to finish
+    thread = threading.Thread(target=generation_llama)
     thread.start()
     thread.join()
     return streamer