YOUSEF2434 commited on
Commit
2152916
·
verified ·
1 Parent(s): 68b3e68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -1,19 +1,26 @@
1
  import os
 
2
  from collections.abc import Iterator
3
-
4
  import gradio as gr
5
  from llama_cpp import Llama
6
 
7
- # 👤 Load GGUF Model
8
- model_path = "TinyLlama-1.1B-Chat.gguf" # Change if needed
9
- llm = Llama(model_path=model_path, n_ctx=4096, n_threads=os.cpu_count(), use_mlock=True)
 
 
 
 
 
10
 
11
- DESCRIPTION = "# Sheikh AI TinyLlama (GGUF with llama.cpp)"
12
- DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – optimized for performance.</p>"
 
 
 
13
 
14
  MAX_NEW_TOKENS = 1024
15
 
16
- # 🧠 Format messages into a prompt for GGUF chat models
17
  def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str:
18
  chat = f"<|system|>\n{system_prompt.strip()}</s>\n"
19
  for turn in chat_history:
@@ -24,8 +31,6 @@ def format_conversation(system_prompt: str, chat_history: list[dict], user_input
24
  chat += f"<|user|>\n{user_input.strip()}</s>\n<|assistant|>\n"
25
  return chat
26
 
27
-
28
- # 💬 Gradio chatbot function
29
  def generate(
30
  message: str,
31
  chat_history: list[dict],
@@ -60,7 +65,6 @@ def generate(
60
  yield partial
61
 
62
 
63
- # 🧪 Launch the interface
64
  demo = gr.ChatInterface(
65
  fn=generate,
66
  additional_inputs=[
 
1
  import os
2
+ import urllib.request
3
  from collections.abc import Iterator
 
4
  import gradio as gr
5
  from llama_cpp import Llama
6
 
7
+ # 💾 Download GGUF from Hugging Face if not already present
8
+ GGUF_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-GGUF/resolve/main/TinyLlama-1.1B-Chat.Q4_K_M.gguf"
9
+ MODEL_FILENAME = "TinyLlama-1.1B-Chat.Q4_K_M.gguf"
10
+
11
+ if not os.path.exists(MODEL_FILENAME):
12
+ print(f"🔽 Downloading model from Hugging Face: {GGUF_URL}")
13
+ urllib.request.urlretrieve(GGUF_URL, MODEL_FILENAME)
14
+ print("✅ Download complete!")
15
 
16
+ # 🧠 Load GGUF model using llama-cpp
17
+ llm = Llama(model_path=MODEL_FILENAME, n_ctx=4096, n_threads=os.cpu_count())
18
+
19
+ DESCRIPTION = "# Sheikh AI – TinyLlama (GGUF from HF)"
20
+ DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – downloaded automatically.</p>"
21
 
22
  MAX_NEW_TOKENS = 1024
23
 
 
24
  def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str:
25
  chat = f"<|system|>\n{system_prompt.strip()}</s>\n"
26
  for turn in chat_history:
 
31
  chat += f"<|user|>\n{user_input.strip()}</s>\n<|assistant|>\n"
32
  return chat
33
 
 
 
34
  def generate(
35
  message: str,
36
  chat_history: list[dict],
 
65
  yield partial
66
 
67
 
 
68
  demo = gr.ChatInterface(
69
  fn=generate,
70
  additional_inputs=[