llamacpp-flan-t5-large-grammar-synthesis

Sleeping

Akjava commited on Mar 19

Commit

aa9930d

verified ·

1 Parent(s): d516a21

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -66,7 +66,7 @@ import multiprocessing
 import llama_cpp
-def test():
     llama_cpp.llama_backend_init(numa=False)
@@ -283,6 +283,15 @@ def respond(
     top_k: int,
     repeat_penalty: float,
 ):
     """
     Respond to a message using the Gemma3 model via Llama.cpp.
@@ -320,7 +329,7 @@ def respond(
             llm_model = model
         trans(message)
-        yield "done"
         provider = LlamaCppPythonProvider(llm)
@@ -367,7 +376,7 @@ def respond(
         outputs = ""
         for output in stream:
             outputs += output
-            yield outputs
     # Handle exceptions that may occur during the process
     except Exception as e:

 import llama_cpp
+def low_level():
     llama_cpp.llama_backend_init(numa=False)
     top_k: int,
     repeat_penalty: float,
 ):
+    llama = Llama("models/madlad400-3b-mt-q8_0.gguf")
+    tokens = llama.tokenize(fb"{message}")
+    llama.encode(tokens)
+    tokens = [llama.decoder_start_token()]
+    for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1, repeat_penalty=1.0):
+        yield (llama.detokenize([token]))
+        if token == llama.token_eos():
+            break
     """
     Respond to a message using the Gemma3 model via Llama.cpp.
             llm_model = model
         trans(message)
+        #yield "done"
         provider = LlamaCppPythonProvider(llm)
         outputs = ""
         for output in stream:
             outputs += output
+            #yield outputs
     # Handle exceptions that may occur during the process
     except Exception as e: