Spaces:
Running
Running
| import json | |
| import chromadb | |
| import os | |
| import requests | |
| from chromadb.utils import embedding_functions | |
| from groq import Groq | |
| class PortfolioRAG: | |
| def __init__(self, data_path="data/metadata.json", db_path="chroma_db", model_name="gpt-oss:20b-cloud"): | |
| self.data_path = data_path | |
| self.db_path = db_path | |
| self.groq_api_key = os.environ.get("GROQ_API_KEY") | |
| if self.groq_api_key: | |
| self.groq_client = Groq(api_key=self.groq_api_key) | |
| else: | |
| print("WARNING: GROQ_API_KEY environment variable not set!") | |
| self.collection = None | |
| self._init_knowledge_base() | |
| def _init_knowledge_base(self): | |
| print("Initializing Local Vector Database (ChromaDB)...") | |
| client = chromadb.PersistentClient(path=self.db_path) | |
| default_ef = embedding_functions.DefaultEmbeddingFunction() | |
| self.collection = client.get_or_create_collection( | |
| name="meta_portfolio", | |
| embedding_function=default_ef | |
| ) | |
| if not os.path.exists(self.data_path): | |
| print(f"Error: {self.data_path} not found.") | |
| return | |
| with open(self.data_path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| if isinstance(data, dict): | |
| data = [data] | |
| documents = [] | |
| metadatas = [] | |
| ids = [] | |
| for i, item in enumerate(data): | |
| project_name = item.get("project", f"Project_{i}") | |
| category = item.get("category", "Unknown") | |
| domain = item.get("domain", "Unknown Domain") | |
| link = item.get("link", "No link provided") | |
| achievement = item.get("key_achievement", "") | |
| challenge = item.get("challenge", "") | |
| extra_info = "" | |
| if category.lower() == "project" or "language" in item: | |
| language = ", ".join(item.get("language", [])) | |
| tech_stack = ", ".join(item.get("tech_stack", [])) | |
| extra_info = ( | |
| f"Languages: {language}\n" | |
| f"Tech Stack: {tech_stack}" | |
| ) | |
| elif category.lower() == "article" or "abstract" in item: | |
| abstract = item.get("abstract", "") | |
| extra_info = f"Abstract: {abstract}" | |
| doc_text = ( | |
| f"Project Name: {project_name}\n" | |
| f"Domain: {domain}\n" | |
| f"Category: {category}\n" | |
| f"Link: {link}\n" | |
| f"{extra_info}\n" | |
| f"Key Achievement: {achievement}\n" | |
| f"Technical Challenge: {challenge}" | |
| ) | |
| documents.append(doc_text) | |
| meta_dict = { | |
| "project": project_name, | |
| "domain": domain, | |
| "category": category, | |
| "link": link, | |
| "key_achievement": achievement, | |
| "challenge": challenge | |
| } | |
| if item.get("language"): | |
| meta_dict["language"] = ", ".join(item["language"]) | |
| if item.get("tech_stack"): | |
| meta_dict["tech_stack"] = ", ".join(item["tech_stack"]) | |
| if item.get("abstract"): | |
| meta_dict["abstract"] = item["abstract"] | |
| metadatas.append(meta_dict) | |
| ids.append(f"doc_{i}") | |
| self.collection.upsert(documents=documents, metadatas=metadatas, ids=ids) | |
| print(f"Knowledge Base ready! ({len(documents)} items stored)\n") | |
| def retrieve_context(self, query, n_results=2): | |
| if not self.collection: | |
| return "", [] | |
| print("Retrieving relevant context from ChromaDB...") | |
| results = self.collection.query( | |
| query_texts=[query], | |
| n_results=n_results | |
| ) | |
| context_text = "" | |
| source_metadata = [] | |
| if results and results["documents"] and results["documents"][0]: | |
| context_text = "\n\n".join(results["documents"][0]) | |
| if "metadatas" in results and results["metadatas"][0]: | |
| source_metadata = results["metadatas"][0] | |
| return context_text, source_metadata | |
| def generate_response(self, query, context): | |
| print(f"Asking local Ollama ({self.ollama_model})...") | |
| prompt = f"""You are an AI assistant representing the portfolio of Edmond Song, a Senior DeFi and AI Research Engineer. | |
| Based ONLY on the following context about Edmond's projects and articles, answer the user's question clearly and professionally. | |
| Context: | |
| {context} | |
| Question: | |
| {query} | |
| Answer:""" | |
| try: | |
| chat_completion = self.groq_client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": prompt, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| temperature=0.3, | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error communicating with Groq API: {str(e)}" | |
| def chat(self, query): | |
| context_text, source_metadata = self.retrieve_context(query) | |
| if context_text: | |
| ai_response = self.generate_response(query, context_text) | |
| return { | |
| "answer": ai_response, | |
| "metadata": source_metadata | |
| } | |
| return { | |
| "answer": "I don't have enough context to answer that.", | |
| "metadata": [] | |
| } |