prakhardoneria commited on
Commit
a8c2418
·
verified ·
1 Parent(s): 9faffe6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -3,11 +3,15 @@ import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
 
6
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
- MODEL_FILENAME = "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf"
8
 
9
- # Auto-download GGUF model
10
- model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
 
 
 
11
 
12
  # Load with llama-cpp
13
  llm = Llama(
@@ -17,17 +21,19 @@ llm = Llama(
17
  use_mlock=True
18
  )
19
 
 
20
  def answer_question(question):
21
  prompt = f"[INST] {question} [/INST]"
22
  output = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9, stop=["</s>"])
23
  return output["choices"][0]["text"].strip()
24
 
 
25
  demo = gr.Interface(
26
  fn=answer_question,
27
  inputs=gr.Textbox(lines=2, label="Ask a programming question"),
28
  outputs=gr.Textbox(label="Answer"),
29
  title="TinyLlama Code Assistant",
30
- description="Ask coding questions and get answers from a local TinyLlama model (1.1B, Q4_K_M)."
31
  )
32
 
33
  if __name__ == "__main__":
 
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
+ # Model config
7
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
8
+ MODEL_FILE = "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf"
9
 
10
+ # Download and cache model
11
+ try:
12
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir="./models")
13
+ except Exception as e:
14
+ raise RuntimeError(f"Failed to download model: {e}") from e
15
 
16
  # Load with llama-cpp
17
  llm = Llama(
 
21
  use_mlock=True
22
  )
23
 
24
+ # Answer function
25
  def answer_question(question):
26
  prompt = f"[INST] {question} [/INST]"
27
  output = llm(prompt, max_tokens=256, temperature=0.7, top_p=0.9, stop=["</s>"])
28
  return output["choices"][0]["text"].strip()
29
 
30
+ # Gradio app
31
  demo = gr.Interface(
32
  fn=answer_question,
33
  inputs=gr.Textbox(lines=2, label="Ask a programming question"),
34
  outputs=gr.Textbox(label="Answer"),
35
  title="TinyLlama Code Assistant",
36
+ description="Lightweight Q&A with TinyLlama (1.1B GGUF)."
37
  )
38
 
39
  if __name__ == "__main__":