d-e-e-k-11 commited on
Commit
88d8657
·
verified ·
1 Parent(s): 8407b7a

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import os
4
+
5
+ MODEL_PATH = "llama-2-7b-chat.ggmlv3.q2_K.bin"
6
+
7
+ # Load model at startup
8
+ print("Loading Llama-2 model...")
9
+ llm = None
10
+ if os.path.exists(MODEL_PATH):
11
+ llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=False)
12
+ print("Model loaded!")
13
+ else:
14
+ print(f"Model not found at {MODEL_PATH}. Upload the model file to the Space.")
15
+
16
+ def chat(message, history):
17
+ if llm is None:
18
+ return "Model not loaded. Please upload 'llama-2-7b-chat.ggmlv3.q2_K.bin' to this Space."
19
+
20
+ # Build conversation context from history
21
+ context = ""
22
+ for user_msg, bot_msg in history[-5:]: # use last 5 turns
23
+ context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
24
+
25
+ prompt = f"[INST] <<SYS>>\nYou are a helpful AI assistant.\n<</SYS>>\n\n{context}[INST] {message} [/INST]"
26
+
27
+ output = llm(prompt, max_tokens=512, stop=["[/INST]", "</s>"], echo=False)
28
+ response = output["choices"][0]["text"].strip()
29
+ return response
30
+
31
+ # Gradio chat interface
32
+ demo = gr.ChatInterface(
33
+ fn=chat,
34
+ title="Llama-2-7B Chatbot",
35
+ description=(
36
+ "An offline AI chatbot powered by **Llama-2-7B** (GGMLv3 Q2_K quantized).\n\n"
37
+ "> Note: The model file `llama-2-7b-chat.ggmlv3.q2_K.bin` must be uploaded to the Space files."
38
+ ),
39
+ theme=gr.themes.Soft(
40
+ primary_hue="blue",
41
+ secondary_hue="slate",
42
+ ),
43
+ examples=[
44
+ "What is machine learning?",
45
+ "Write a Python function to reverse a string.",
46
+ "Explain quantum computing in simple terms.",
47
+ "What are the planets in the solar system?",
48
+ ],
49
+ retry_btn="Retry",
50
+ undo_btn="Undo",
51
+ clear_btn="Clear",
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ demo.launch()