File size: 4,373 Bytes
db23b2c 2ce4d84 651a8f1 0a91e85 eb2c9cb d6ea259 8d9f06c eb2c9cb 651a8f1 eb2c9cb 651a8f1 0a91e85 651a8f1 8d8e583 2ce4d84 8d8e583 2ce4d84 8d8e583 2ce4d84 8d8e583 2ce4d84 52d6d7b 2ce4d84 0b5cb01 8d8e583 2ce4d84 4b8a906 2ce4d84 4b8a906 2ce4d84 4b8a906 2ce4d84 52d6d7b 2ce4d84 8d8e583 11e8219 8d8e583 0b5cb01 8d8e583 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import uuid
import re
import string
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from llama_cpp import Llama
import requests
from pathlib import Path
# ---------------------- Download Model ----------------------
MODEL_URL = "https://huggingface.co/datasets/psy7743/llama3-8b-instruct-Q8_0.gguf/resolve/main/llama3-8b-instruct-Q8_0.gguf"
MODEL_PATH = "llama3-8b-instruct-Q8_0.gguf"
# ---------------------- Download Model ----------------------
# MODEL_URL = "https://huggingface.co/tensorblock/Meta-Llama-3.2-1B-GGUF/resolve/main/Meta-Llama-3.2-1B-Q8_0.gguf"
# MODEL_PATH = "Meta-Llama-3.2-1B-Q8_0.gguf"
if not Path(MODEL_PATH).exists():
print("📥 Downloading Meta LLaMA 3 8B Q8_0 model from tensorblock...")
response = requests.get(MODEL_URL, stream=True)
with open(MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print("✅ Download complete!")
# ---------------------- Load Data ----------------------
df = pd.read_csv("jupiter_faqs.csv")
def clean_text(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
text = text.replace('\n', ' ').replace('\t', ' ')
return re.sub(r'\s+', ' ', text).strip()
df['clean_question'] = df['question'].apply(clean_text)
df['clean_answer'] = df['answer'].apply(clean_text)
df['document'] = df.apply(lambda row: f"Question: {row['clean_question']}\nAnswer: {row['clean_answer']}", axis=1)
# ---------------------- Embeddings ----------------------
embedding_model = SentenceTransformer('all-mpnet-base-v2')
df['embedding'] = df['clean_question'].apply(lambda x: embedding_model.encode(x).tolist())
df['uid'] = [str(uuid.uuid4()) for _ in range(len(df))]
# ---------------------- ChromaDB ----------------------
persist_dir = "chroma_qa_db"
chroma_client = chromadb.PersistentClient(path=persist_dir, settings=Settings())
collection_name = "qa_collection"
if collection_name in [c.name for c in chroma_client.list_collections()]:
chroma_client.delete_collection(name=collection_name)
collection = chroma_client.get_or_create_collection(name=collection_name)
if len(collection.get()["ids"]) == 0:
collection.add(
documents=df['document'].tolist(),
embeddings=df['embedding'].tolist(),
ids=df['uid'].astype(str).tolist()
)
# ---------------------- LLaMA ----------------------
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=8,
n_gpu_layers=-1,
)
# ---------------------- Inference ----------------------
def search_chroma(query, n_results=5):
query_embedding = embedding_model.encode(query).tolist()
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
include=['documents']
)
return results["documents"][0]
def generate_response(message, history=[]):
docs = search_chroma(message)
context_str = "\n\n".join(docs)
sys_prompt = """You are a helpful assistant. Based on the context and user query, provide an accurate and concise answer.
Rules:
- If context is relevant, answer based on it.
- If context is insufficient, ask for clarification or say it's not possible.
- Do not hallucinate or make up answers.
- Keep the tone friendly."""
prompt = f"{sys_prompt}\n\ncontext:\n{context_str}\n\nQuestion: {message}\nAnswer:"
response = llm(
prompt,
max_tokens=512,
temperature=0.3,
top_p=0.8,
stop=["Q:", "\n"],
echo=True
)
return response["choices"][0]["text"].split("Answer:")[-1].strip()
# ---------------------- Gradio Interface ----------------------
def gradio_chat_interface(message, history):
reply = generate_response(message)
history = history + [(message, reply)]
return history, history
demo = gr.ChatInterface(
fn=generate_response,
title="🦙 LLaMA-3 FAQ Chatbot",
chatbot=gr.Chatbot(label="Ask me anything about Jupiter Money!"),
examples=[
"What is Jupiter Edge credit card?",
"What happens if I miss a payment?",
"How to change billing address?"
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch(share=True)
|