Spaces:

aakashrajput
/

RunningAgent

Build error

App Files Files Community

aakashrajput commited on Jan 10

Commit

1d57e13

verified ·

1 Parent(s): 50d0fcb

Create app.py

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# Fetch token from Hugging Face Secrets
+hf_token = os.getenv("HF_TOKEN")
+# 1. Download the quantized model
+# Using Q4_K_M (4-bit) for the best balance of speed and intelligence
+model_path = hf_hub_download(
+    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
+    filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+    token=hf_token
+)
+# 2. Initialize the model
+# n_ctx=2048: Enough for good conversations without lagging the CPU
+# n_threads=2: Matches the 2-core limit of the HF Free Tier
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=2,
+    verbose=False
+)
+def generate_response(message, history):
+    # Construct the Llama 3.2 Chat Template
+    prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|>"
+    for user_msg, assistant_msg in history:
+        prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
+    prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+    # Streaming the response for a "fast" feel
+    response = ""
+    stream = llm(
+        prompt,
+        max_tokens=512,
+        stop=["<|eot_id|>", "<|start_header_id|>"],
+        stream=True
+    )
+    for output in stream:
+        token = output["choices"][0]["text"]
+        response += token
+        yield response
+# 3. Gradio UI with a clean "Chat" look
+demo = gr.ChatInterface(
+    fn=generate_response,
+    title="Llama 3.2 (3B) - Optimized CPU",
+    description="Running with llama-cpp-python for maximum speed on free hardware.",
+    theme="glass"
+)
+if __name__ == "__main__":
+    demo.launch()