Jupiter / app.py
psy7743's picture
Update app.py
eb2c9cb verified
import os
import uuid
import re
import string
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from llama_cpp import Llama
import requests
from pathlib import Path
# ---------------------- Download Model ----------------------
MODEL_URL = "https://huggingface.co/datasets/psy7743/llama3-8b-instruct-Q8_0.gguf/resolve/main/llama3-8b-instruct-Q8_0.gguf"
MODEL_PATH = "llama3-8b-instruct-Q8_0.gguf"
# ---------------------- Download Model ----------------------
# MODEL_URL = "https://huggingface.co/tensorblock/Meta-Llama-3.2-1B-GGUF/resolve/main/Meta-Llama-3.2-1B-Q8_0.gguf"
# MODEL_PATH = "Meta-Llama-3.2-1B-Q8_0.gguf"
if not Path(MODEL_PATH).exists():
print("📥 Downloading Meta LLaMA 3 8B Q8_0 model from tensorblock...")
response = requests.get(MODEL_URL, stream=True)
with open(MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print("✅ Download complete!")
# ---------------------- Load Data ----------------------
df = pd.read_csv("jupiter_faqs.csv")
def clean_text(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
text = text.replace('\n', ' ').replace('\t', ' ')
return re.sub(r'\s+', ' ', text).strip()
df['clean_question'] = df['question'].apply(clean_text)
df['clean_answer'] = df['answer'].apply(clean_text)
df['document'] = df.apply(lambda row: f"Question: {row['clean_question']}\nAnswer: {row['clean_answer']}", axis=1)
# ---------------------- Embeddings ----------------------
embedding_model = SentenceTransformer('all-mpnet-base-v2')
df['embedding'] = df['clean_question'].apply(lambda x: embedding_model.encode(x).tolist())
df['uid'] = [str(uuid.uuid4()) for _ in range(len(df))]
# ---------------------- ChromaDB ----------------------
persist_dir = "chroma_qa_db"
chroma_client = chromadb.PersistentClient(path=persist_dir, settings=Settings())
collection_name = "qa_collection"
if collection_name in [c.name for c in chroma_client.list_collections()]:
chroma_client.delete_collection(name=collection_name)
collection = chroma_client.get_or_create_collection(name=collection_name)
if len(collection.get()["ids"]) == 0:
collection.add(
documents=df['document'].tolist(),
embeddings=df['embedding'].tolist(),
ids=df['uid'].astype(str).tolist()
)
# ---------------------- LLaMA ----------------------
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=8,
n_gpu_layers=-1,
)
# ---------------------- Inference ----------------------
def search_chroma(query, n_results=5):
query_embedding = embedding_model.encode(query).tolist()
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
include=['documents']
)
return results["documents"][0]
def generate_response(message, history=[]):
docs = search_chroma(message)
context_str = "\n\n".join(docs)
sys_prompt = """You are a helpful assistant. Based on the context and user query, provide an accurate and concise answer.
Rules:
- If context is relevant, answer based on it.
- If context is insufficient, ask for clarification or say it's not possible.
- Do not hallucinate or make up answers.
- Keep the tone friendly."""
prompt = f"{sys_prompt}\n\ncontext:\n{context_str}\n\nQuestion: {message}\nAnswer:"
response = llm(
prompt,
max_tokens=512,
temperature=0.3,
top_p=0.8,
stop=["Q:", "\n"],
echo=True
)
return response["choices"][0]["text"].split("Answer:")[-1].strip()
# ---------------------- Gradio Interface ----------------------
def gradio_chat_interface(message, history):
reply = generate_response(message)
history = history + [(message, reply)]
return history, history
demo = gr.ChatInterface(
fn=generate_response,
title="🦙 LLaMA-3 FAQ Chatbot",
chatbot=gr.Chatbot(label="Ask me anything about Jupiter Money!"),
examples=[
"What is Jupiter Edge credit card?",
"What happens if I miss a payment?",
"How to change billing address?"
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch(share=True)