import streamlit as st from llama_index.core import VectorStoreIndex, Document, Settings, SimpleDirectoryReader, StorageContext from llama_index.core.node_parser import SemanticSplitterNodeParser from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.ollama import Ollama from llama_index.retrievers.bm25 import BM25Retriever from llama_index.core.retrievers import QueryFusionRetriever from llama_index.core.retrievers.fusion_retriever import FUSION_MODES from llama_index.core.chat_engine import CondensePlusContextChatEngine from llama_index.core.memory import ChatMemoryBuffer import httpx import os import datetime from huggingface_hub import HfApi, CommitScheduler from pathlib import Path import json import uuid # --- 1. CONFIGURATION --- RESEARCHER_NAME = "Enoch Hyunwook Kang" OLLAMA_BASE_URL = "https://researchbot.share.zrok.io" OLLAMA_MODEL = "qwen3:8b" # --- 2. LOGGING SETUP (Hugging Face Dataset) --- # Create a private dataset on HF (e.g., "ehwkang/researchbot-logs") first! LOG_DATASET = "ehwkang/researchbot-logs" LOG_FILE = "qna_logs.jsonl" scheduler = CommitScheduler( repo_id=LOG_DATASET, repo_type="dataset", folder_path="logs", path_in_repo="data", every=10 # Upload every 10 minutes (or on shutdown) ) def log_interaction(question, answer): # Determine the log file path log_path = Path("logs") / LOG_FILE log_path.parent.mkdir(parents=True, exist_ok=True) entry = { "timestamp": datetime.datetime.now().isoformat(), "session_id": st.session_state.get("session_id"), "question": question, "answer": str(answer) } with scheduler.lock: with log_path.open("a") as f: f.write(json.dumps(entry) + "\n") # --- 3. CUSTOM OLLAMA CLIENT --- class CustomOllama(Ollama): def _get_client(self): return httpx.Client( base_url=self.base_url, timeout=120.0, headers={"skip_zrok_interstitial": "true"} ) # --- 4. SETUP --- st.set_page_config(page_title=f"{RESEARCHER_NAME}'s Research", layout="centered") if "session_id" not in st.session_state: st.session_state.session_id = str(uuid.uuid4()) # Initialize Models try: # Embedding Model (Runs on HF CPU - lightweight) embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.embed_model = embed_model # LLM (Runs on your Local GPU) Settings.llm = CustomOllama( model=OLLAMA_MODEL, base_url=OLLAMA_BASE_URL, request_timeout=120.0, context_window=8192, # 8k is usually enough for RAG temperature=0.3 # Lower temp for factual research answers ) except Exception as e: st.error(f"Configuration Error: {e}") # --- 5. INTELLIGENT INDEXING (Semantic + Hybrid) --- @st.cache_resource def load_resources(): script_dir = os.path.dirname(os.path.abspath(__file__)) # A. Load CV cv_text = "" cv_path = os.path.join(script_dir, "CV.txt") if os.path.exists(cv_path): with open(cv_path, "r", encoding="utf-8") as f: cv_text = f.read() # B. Load Papers & Build Index data_dir = os.path.join(script_dir, "data") if not os.path.exists(data_dir): return cv_text, None documents = SimpleDirectoryReader(data_dir, required_exts=[".txt"], recursive=True).load_data() if not documents: return cv_text, None # SOTA 1: Semantic Chunking (Splits by meaning, not just line count) # Note: This runs on CPU (HF Spaces), so it might take 30-60s on boot. splitter = SemanticSplitterNodeParser( buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model ) nodes = splitter.get_nodes_from_documents(documents) # Create Vector Index vector_index = VectorStoreIndex(nodes) return cv_text, vector_index, nodes cv_content, vector_index, all_nodes = load_resources() # --- 6. HYBRID RETRIEVER & CHAT ENGINE --- def get_chat_engine(): if not vector_index: return None # SOTA 2: Hybrid Retrieval (Vector + BM25) # 1. Vector Search (Semantic understanding) vector_retriever = vector_index.as_retriever(similarity_top_k=5) # 2. BM25 Search (Keyword precision - crucial for specific algorithm names) bm25_retriever = BM25Retriever.from_defaults(nodes=all_nodes, similarity_top_k=5) # 3. Fusion (Combine results) retriever = QueryFusionRetriever( [vector_retriever, bm25_retriever], similarity_top_k=5, num_queries=1, mode=FUSION_MODES.RECIPROCAL_RANK, # <--- USE ENUM (safest) # OR use mode="reciprocal_rerank" (note the extra 're') use_async=False, ) # SOTA 3: CondensePlusContext # Handles: "What is its accuracy?" -> "What is the accuracy of [Previous Topic]?" memory = ChatMemoryBuffer.from_defaults(token_limit=4000) system_prompt = ( f"You are {RESEARCHER_NAME}. Answer questions about your research based ONLY on the provided context. " f"If the answer is not in the context, say you don't know. " f"Here is your CV for biographical context:\n{cv_content}" ) return CondensePlusContextChatEngine.from_defaults( retriever=retriever, llm=Settings.llm, memory=memory, system_prompt=system_prompt, verbose=True ) chat_engine = get_chat_engine() # --- 7. CHAT UI --- if "messages" not in st.session_state: st.session_state.messages = [{"role": "assistant", "content": "Hello! Ask me about my research."}] for message in st.session_state.messages: with st.chat_message(message["role"]): st.write(message["content"]) if prompt := st.chat_input("Ask a question..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.write(prompt) if chat_engine: with st.chat_message("assistant"): with st.spinner("Thinking..."): response = chat_engine.chat(prompt) st.write(str(response)) st.session_state.messages.append({"role": "assistant", "content": str(response)}) # Log to HF Dataset log_interaction(prompt, response) else: st.error("Index not loaded. Check 'data' folder.")