Spaces:
Build error
Build error
kwabs22 commited on
Commit ·
e3894fb
1
Parent(s): 5dd2646
Working but added inference times
Browse files
README.md
CHANGED
|
@@ -5,6 +5,10 @@ colorFrom: blue
|
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
|
|
|
|
|
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
models:
|
| 9 |
+
- stabilityai/stablelm-2-zephyr-1_6b
|
| 10 |
---
|
| 11 |
|
| 12 |
+
Example of running llama.cpp (and by extension simple cpp) from python without pip package dependency issues
|
| 13 |
+
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
#from llama_cpp import Llama
|
| 3 |
import random
|
| 4 |
import subprocess
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Initialize model
|
| 7 |
#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
|
|
@@ -46,21 +48,21 @@ def generate_response(user_message):
|
|
| 46 |
"-e"
|
| 47 |
]
|
| 48 |
|
| 49 |
-
print("Before request")
|
| 50 |
# Start the subprocess
|
| 51 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 52 |
|
|
|
|
| 53 |
alllines = ""
|
| 54 |
|
| 55 |
# Yield each line of output as it becomes available
|
| 56 |
for line in process.stdout:
|
| 57 |
alllines += " " + line
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
# Wait for the subprocess to finish if it hasn't already
|
| 61 |
process.wait()
|
| 62 |
|
| 63 |
-
print("After response")
|
| 64 |
# Check for any errors
|
| 65 |
if process.returncode != 0:
|
| 66 |
error_message = process.stderr.read()
|
|
|
|
| 2 |
#from llama_cpp import Llama
|
| 3 |
import random
|
| 4 |
import subprocess
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
|
| 8 |
# Initialize model
|
| 9 |
#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
|
|
|
|
| 48 |
"-e"
|
| 49 |
]
|
| 50 |
|
|
|
|
| 51 |
# Start the subprocess
|
| 52 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 53 |
|
| 54 |
+
start_time = time.time()
|
| 55 |
alllines = ""
|
| 56 |
|
| 57 |
# Yield each line of output as it becomes available
|
| 58 |
for line in process.stdout:
|
| 59 |
alllines += " " + line
|
| 60 |
+
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 61 |
+
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
|
| 62 |
|
| 63 |
# Wait for the subprocess to finish if it hasn't already
|
| 64 |
process.wait()
|
| 65 |
|
|
|
|
| 66 |
# Check for any errors
|
| 67 |
if process.returncode != 0:
|
| 68 |
error_message = process.stderr.read()
|