import os import uuid import re import string import pandas as pd import gradio as gr from sentence_transformers import SentenceTransformer import chromadb from chromadb.config import Settings from llama_cpp import Llama import requests from pathlib import Path # ---------------------- Download Model ---------------------- MODEL_URL = "https://huggingface.co/datasets/psy7743/llama3-8b-instruct-Q8_0.gguf/resolve/main/llama3-8b-instruct-Q8_0.gguf" MODEL_PATH = "llama3-8b-instruct-Q8_0.gguf" # ---------------------- Download Model ---------------------- # MODEL_URL = "https://huggingface.co/tensorblock/Meta-Llama-3.2-1B-GGUF/resolve/main/Meta-Llama-3.2-1B-Q8_0.gguf" # MODEL_PATH = "Meta-Llama-3.2-1B-Q8_0.gguf" if not Path(MODEL_PATH).exists(): print("📥 Downloading Meta LLaMA 3 8B Q8_0 model from tensorblock...") response = requests.get(MODEL_URL, stream=True) with open(MODEL_PATH, "wb") as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) print("✅ Download complete!") # ---------------------- Load Data ---------------------- df = pd.read_csv("jupiter_faqs.csv") def clean_text(text): text = text.lower() text = text.translate(str.maketrans('', '', string.punctuation)) text = text.replace('\n', ' ').replace('\t', ' ') return re.sub(r'\s+', ' ', text).strip() df['clean_question'] = df['question'].apply(clean_text) df['clean_answer'] = df['answer'].apply(clean_text) df['document'] = df.apply(lambda row: f"Question: {row['clean_question']}\nAnswer: {row['clean_answer']}", axis=1) # ---------------------- Embeddings ---------------------- embedding_model = SentenceTransformer('all-mpnet-base-v2') df['embedding'] = df['clean_question'].apply(lambda x: embedding_model.encode(x).tolist()) df['uid'] = [str(uuid.uuid4()) for _ in range(len(df))] # ---------------------- ChromaDB ---------------------- persist_dir = "chroma_qa_db" chroma_client = chromadb.PersistentClient(path=persist_dir, settings=Settings()) collection_name = "qa_collection" if collection_name in [c.name for c in chroma_client.list_collections()]: chroma_client.delete_collection(name=collection_name) collection = chroma_client.get_or_create_collection(name=collection_name) if len(collection.get()["ids"]) == 0: collection.add( documents=df['document'].tolist(), embeddings=df['embedding'].tolist(), ids=df['uid'].astype(str).tolist() ) # ---------------------- LLaMA ---------------------- llm = Llama( model_path=MODEL_PATH, n_ctx=4096, n_threads=8, n_gpu_layers=-1, ) # ---------------------- Inference ---------------------- def search_chroma(query, n_results=5): query_embedding = embedding_model.encode(query).tolist() results = collection.query( query_embeddings=[query_embedding], n_results=n_results, include=['documents'] ) return results["documents"][0] def generate_response(message, history=[]): docs = search_chroma(message) context_str = "\n\n".join(docs) sys_prompt = """You are a helpful assistant. Based on the context and user query, provide an accurate and concise answer. Rules: - If context is relevant, answer based on it. - If context is insufficient, ask for clarification or say it's not possible. - Do not hallucinate or make up answers. - Keep the tone friendly.""" prompt = f"{sys_prompt}\n\ncontext:\n{context_str}\n\nQuestion: {message}\nAnswer:" response = llm( prompt, max_tokens=512, temperature=0.3, top_p=0.8, stop=["Q:", "\n"], echo=True ) return response["choices"][0]["text"].split("Answer:")[-1].strip() # ---------------------- Gradio Interface ---------------------- def gradio_chat_interface(message, history): reply = generate_response(message) history = history + [(message, reply)] return history, history demo = gr.ChatInterface( fn=generate_response, title="🦙 LLaMA-3 FAQ Chatbot", chatbot=gr.Chatbot(label="Ask me anything about Jupiter Money!"), examples=[ "What is Jupiter Edge credit card?", "What happens if I miss a payment?", "How to change billing address?" ], cache_examples=False ) if __name__ == "__main__": demo.launch(share=True)