Spaces:
Running
Running
File size: 5,701 Bytes
06640a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | import json
import chromadb
import os
import requests
from chromadb.utils import embedding_functions
from groq import Groq
class PortfolioRAG:
def __init__(self, data_path="data/metadata.json", db_path="chroma_db", model_name="gpt-oss:20b-cloud"):
self.data_path = data_path
self.db_path = db_path
self.groq_api_key = os.environ.get("GROQ_API_KEY")
if self.groq_api_key:
self.groq_client = Groq(api_key=self.groq_api_key)
else:
print("WARNING: GROQ_API_KEY environment variable not set!")
self.collection = None
self._init_knowledge_base()
def _init_knowledge_base(self):
print("Initializing Local Vector Database (ChromaDB)...")
client = chromadb.PersistentClient(path=self.db_path)
default_ef = embedding_functions.DefaultEmbeddingFunction()
self.collection = client.get_or_create_collection(
name="meta_portfolio",
embedding_function=default_ef
)
if not os.path.exists(self.data_path):
print(f"Error: {self.data_path} not found.")
return
with open(self.data_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
data = [data]
documents = []
metadatas = []
ids = []
for i, item in enumerate(data):
project_name = item.get("project", f"Project_{i}")
category = item.get("category", "Unknown")
domain = item.get("domain", "Unknown Domain")
link = item.get("link", "No link provided")
achievement = item.get("key_achievement", "")
challenge = item.get("challenge", "")
extra_info = ""
if category.lower() == "project" or "language" in item:
language = ", ".join(item.get("language", []))
tech_stack = ", ".join(item.get("tech_stack", []))
extra_info = (
f"Languages: {language}\n"
f"Tech Stack: {tech_stack}"
)
elif category.lower() == "article" or "abstract" in item:
abstract = item.get("abstract", "")
extra_info = f"Abstract: {abstract}"
doc_text = (
f"Project Name: {project_name}\n"
f"Domain: {domain}\n"
f"Category: {category}\n"
f"Link: {link}\n"
f"{extra_info}\n"
f"Key Achievement: {achievement}\n"
f"Technical Challenge: {challenge}"
)
documents.append(doc_text)
meta_dict = {
"project": project_name,
"domain": domain,
"category": category,
"link": link,
"key_achievement": achievement,
"challenge": challenge
}
if item.get("language"):
meta_dict["language"] = ", ".join(item["language"])
if item.get("tech_stack"):
meta_dict["tech_stack"] = ", ".join(item["tech_stack"])
if item.get("abstract"):
meta_dict["abstract"] = item["abstract"]
metadatas.append(meta_dict)
ids.append(f"doc_{i}")
self.collection.upsert(documents=documents, metadatas=metadatas, ids=ids)
print(f"Knowledge Base ready! ({len(documents)} items stored)\n")
def retrieve_context(self, query, n_results=2):
if not self.collection:
return "", []
print("Retrieving relevant context from ChromaDB...")
results = self.collection.query(
query_texts=[query],
n_results=n_results
)
context_text = ""
source_metadata = []
if results and results["documents"] and results["documents"][0]:
context_text = "\n\n".join(results["documents"][0])
if "metadatas" in results and results["metadatas"][0]:
source_metadata = results["metadatas"][0]
return context_text, source_metadata
def generate_response(self, query, context):
print(f"Asking local Ollama ({self.ollama_model})...")
prompt = f"""You are an AI assistant representing the portfolio of Edmond Song, a Senior DeFi and AI Research Engineer.
Based ONLY on the following context about Edmond's projects and articles, answer the user's question clearly and professionally.
Context:
{context}
Question:
{query}
Answer:"""
try:
chat_completion = self.groq_client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama3-8b-8192",
temperature=0.3,
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error communicating with Groq API: {str(e)}"
def chat(self, query):
context_text, source_metadata = self.retrieve_context(query)
if context_text:
ai_response = self.generate_response(query, context_text)
return {
"answer": ai_response,
"metadata": source_metadata
}
return {
"answer": "I don't have enough context to answer that.",
"metadata": []
} |