Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,11 +3,15 @@ import gradio as gr
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
from llama_cpp import Llama
|
| 5 |
|
|
|
|
| 6 |
MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
| 7 |
-
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Load with llama-cpp
|
| 13 |
llm = Llama(
|
|
@@ -17,17 +21,19 @@ llm = Llama(
|
|
| 17 |
use_mlock=True
|
| 18 |
)
|
| 19 |
|
|
|
|
| 20 |
def answer_question(question):
|
| 21 |
prompt = f"[INST] {question} [/INST]"
|
| 22 |
output = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9, stop=["</s>"])
|
| 23 |
return output["choices"][0]["text"].strip()
|
| 24 |
|
|
|
|
| 25 |
demo = gr.Interface(
|
| 26 |
fn=answer_question,
|
| 27 |
inputs=gr.Textbox(lines=2, label="Ask a programming question"),
|
| 28 |
outputs=gr.Textbox(label="Answer"),
|
| 29 |
title="TinyLlama Code Assistant",
|
| 30 |
-
description="
|
| 31 |
)
|
| 32 |
|
| 33 |
if __name__ == "__main__":
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
from llama_cpp import Llama
|
| 5 |
|
| 6 |
+
# Model config
|
| 7 |
MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
| 8 |
+
MODEL_FILE = "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf"
|
| 9 |
|
| 10 |
+
# Download and cache model
|
| 11 |
+
try:
|
| 12 |
+
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir="./models")
|
| 13 |
+
except Exception as e:
|
| 14 |
+
raise RuntimeError(f"Failed to download model: {e}") from e
|
| 15 |
|
| 16 |
# Load with llama-cpp
|
| 17 |
llm = Llama(
|
|
|
|
| 21 |
use_mlock=True
|
| 22 |
)
|
| 23 |
|
| 24 |
+
# Answer function
|
| 25 |
def answer_question(question):
|
| 26 |
prompt = f"[INST] {question} [/INST]"
|
| 27 |
output = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9, stop=["</s>"])
|
| 28 |
return output["choices"][0]["text"].strip()
|
| 29 |
|
| 30 |
+
# Gradio app
|
| 31 |
demo = gr.Interface(
|
| 32 |
fn=answer_question,
|
| 33 |
inputs=gr.Textbox(lines=2, label="Ask a programming question"),
|
| 34 |
outputs=gr.Textbox(label="Answer"),
|
| 35 |
title="TinyLlama Code Assistant",
|
| 36 |
+
description="Lightweight Q&A with TinyLlama (1.1B GGUF)."
|
| 37 |
)
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|