File size: 4,373 Bytes
db23b2c
2ce4d84
 
 
 
 
 
 
 
 
651a8f1
 
 
 
0a91e85
eb2c9cb
 
d6ea259
8d9f06c
eb2c9cb
 
651a8f1
 
eb2c9cb
651a8f1
 
 
0a91e85
 
651a8f1
 
8d8e583
2ce4d84
 
 
 
 
 
 
 
 
 
 
 
 
8d8e583
2ce4d84
 
 
 
 
8d8e583
2ce4d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8e583
2ce4d84
 
52d6d7b
2ce4d84
 
 
0b5cb01
 
8d8e583
2ce4d84
 
 
 
 
 
 
 
 
 
4b8a906
 
2ce4d84
 
 
 
 
 
 
 
 
 
4b8a906
2ce4d84
 
 
 
 
 
 
 
 
4b8a906
2ce4d84
 
52d6d7b
2ce4d84
8d8e583
 
 
 
 
 
 
 
11e8219
8d8e583
 
 
 
 
 
 
0b5cb01
8d8e583
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import uuid
import re
import string
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from llama_cpp import Llama
import requests
from pathlib import Path


# ---------------------- Download Model ----------------------
MODEL_URL = "https://huggingface.co/datasets/psy7743/llama3-8b-instruct-Q8_0.gguf/resolve/main/llama3-8b-instruct-Q8_0.gguf"
MODEL_PATH = "llama3-8b-instruct-Q8_0.gguf"

# ---------------------- Download Model ----------------------
# MODEL_URL = "https://huggingface.co/tensorblock/Meta-Llama-3.2-1B-GGUF/resolve/main/Meta-Llama-3.2-1B-Q8_0.gguf"
# MODEL_PATH = "Meta-Llama-3.2-1B-Q8_0.gguf"

if not Path(MODEL_PATH).exists():
    print("📥 Downloading Meta LLaMA 3 8B Q8_0 model from tensorblock...")
    response = requests.get(MODEL_URL, stream=True)
    with open(MODEL_PATH, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print("✅ Download complete!")

# ---------------------- Load Data ----------------------

df = pd.read_csv("jupiter_faqs.csv")

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.replace('\n', ' ').replace('\t', ' ')
    return re.sub(r'\s+', ' ', text).strip()

df['clean_question'] = df['question'].apply(clean_text)
df['clean_answer'] = df['answer'].apply(clean_text)
df['document'] = df.apply(lambda row: f"Question: {row['clean_question']}\nAnswer: {row['clean_answer']}", axis=1)

# ---------------------- Embeddings ----------------------

embedding_model = SentenceTransformer('all-mpnet-base-v2')
df['embedding'] = df['clean_question'].apply(lambda x: embedding_model.encode(x).tolist())
df['uid'] = [str(uuid.uuid4()) for _ in range(len(df))]

# ---------------------- ChromaDB ----------------------

persist_dir = "chroma_qa_db"
chroma_client = chromadb.PersistentClient(path=persist_dir, settings=Settings())
collection_name = "qa_collection"

if collection_name in [c.name for c in chroma_client.list_collections()]:
    chroma_client.delete_collection(name=collection_name)

collection = chroma_client.get_or_create_collection(name=collection_name)

if len(collection.get()["ids"]) == 0:
    collection.add(
        documents=df['document'].tolist(),
        embeddings=df['embedding'].tolist(),
        ids=df['uid'].astype(str).tolist()
    )

# ---------------------- LLaMA ----------------------

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=4096,
    n_threads=8,
    n_gpu_layers=-1,
)

# ---------------------- Inference ----------------------

def search_chroma(query, n_results=5):
    query_embedding = embedding_model.encode(query).tolist()
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=n_results,
        include=['documents']
    )
    return results["documents"][0]

def generate_response(message, history=[]):
    docs = search_chroma(message)
    context_str = "\n\n".join(docs)

    sys_prompt = """You are a helpful assistant. Based on the context and user query, provide an accurate and concise answer.
    
    Rules:
    - If context is relevant, answer based on it.
    - If context is insufficient, ask for clarification or say it's not possible.
    - Do not hallucinate or make up answers.
    - Keep the tone friendly."""

    prompt = f"{sys_prompt}\n\ncontext:\n{context_str}\n\nQuestion: {message}\nAnswer:"

    response = llm(
        prompt,
        max_tokens=512,
        temperature=0.3,
        top_p=0.8,
        stop=["Q:", "\n"],
        echo=True
    )

    return response["choices"][0]["text"].split("Answer:")[-1].strip()

# ---------------------- Gradio Interface ----------------------

def gradio_chat_interface(message, history):
    reply = generate_response(message)
    history = history + [(message, reply)]
    return history, history

demo = gr.ChatInterface(
    fn=generate_response,
    title="🦙 LLaMA-3 FAQ Chatbot",
    chatbot=gr.Chatbot(label="Ask me anything about Jupiter Money!"),
    examples=[
        "What is Jupiter Edge credit card?",
        "What happens if I miss a payment?",
        "How to change billing address?"
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch(share=True)