researchbot / src /streamlit_app.py
ehwkang's picture
Update src/streamlit_app.py
cf1fa07 verified
import streamlit as st
from llama_index.core import VectorStoreIndex, Document, Settings, SimpleDirectoryReader, StorageContext
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.retrievers.fusion_retriever import FUSION_MODES
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
import httpx
import os
import datetime
from huggingface_hub import HfApi, CommitScheduler
from pathlib import Path
import json
import uuid
# --- 1. CONFIGURATION ---
RESEARCHER_NAME = "Enoch Hyunwook Kang"
OLLAMA_BASE_URL = "https://researchbot.share.zrok.io"
OLLAMA_MODEL = "qwen3:8b"
# --- 2. LOGGING SETUP (Hugging Face Dataset) ---
# Create a private dataset on HF (e.g., "ehwkang/researchbot-logs") first!
LOG_DATASET = "ehwkang/researchbot-logs"
LOG_FILE = "qna_logs.jsonl"
scheduler = CommitScheduler(
repo_id=LOG_DATASET,
repo_type="dataset",
folder_path="logs",
path_in_repo="data",
every=10 # Upload every 10 minutes (or on shutdown)
)
def log_interaction(question, answer):
# Determine the log file path
log_path = Path("logs") / LOG_FILE
log_path.parent.mkdir(parents=True, exist_ok=True)
entry = {
"timestamp": datetime.datetime.now().isoformat(),
"session_id": st.session_state.get("session_id"),
"question": question,
"answer": str(answer)
}
with scheduler.lock:
with log_path.open("a") as f:
f.write(json.dumps(entry) + "\n")
# --- 3. CUSTOM OLLAMA CLIENT ---
class CustomOllama(Ollama):
def _get_client(self):
return httpx.Client(
base_url=self.base_url,
timeout=120.0,
headers={"skip_zrok_interstitial": "true"}
)
# --- 4. SETUP ---
st.set_page_config(page_title=f"{RESEARCHER_NAME}'s Research", layout="centered")
if "session_id" not in st.session_state:
st.session_state.session_id = str(uuid.uuid4())
# Initialize Models
try:
# Embedding Model (Runs on HF CPU - lightweight)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
# LLM (Runs on your Local GPU)
Settings.llm = CustomOllama(
model=OLLAMA_MODEL,
base_url=OLLAMA_BASE_URL,
request_timeout=120.0,
context_window=8192, # 8k is usually enough for RAG
temperature=0.3 # Lower temp for factual research answers
)
except Exception as e:
st.error(f"Configuration Error: {e}")
# --- 5. INTELLIGENT INDEXING (Semantic + Hybrid) ---
@st.cache_resource
def load_resources():
script_dir = os.path.dirname(os.path.abspath(__file__))
# A. Load CV
cv_text = ""
cv_path = os.path.join(script_dir, "CV.txt")
if os.path.exists(cv_path):
with open(cv_path, "r", encoding="utf-8") as f:
cv_text = f.read()
# B. Load Papers & Build Index
data_dir = os.path.join(script_dir, "data")
if not os.path.exists(data_dir):
return cv_text, None
documents = SimpleDirectoryReader(data_dir, required_exts=[".txt"], recursive=True).load_data()
if not documents:
return cv_text, None
# SOTA 1: Semantic Chunking (Splits by meaning, not just line count)
# Note: This runs on CPU (HF Spaces), so it might take 30-60s on boot.
splitter = SemanticSplitterNodeParser(
buffer_size=1,
breakpoint_percentile_threshold=95,
embed_model=embed_model
)
nodes = splitter.get_nodes_from_documents(documents)
# Create Vector Index
vector_index = VectorStoreIndex(nodes)
return cv_text, vector_index, nodes
cv_content, vector_index, all_nodes = load_resources()
# --- 6. HYBRID RETRIEVER & CHAT ENGINE ---
def get_chat_engine():
if not vector_index:
return None
# SOTA 2: Hybrid Retrieval (Vector + BM25)
# 1. Vector Search (Semantic understanding)
vector_retriever = vector_index.as_retriever(similarity_top_k=5)
# 2. BM25 Search (Keyword precision - crucial for specific algorithm names)
bm25_retriever = BM25Retriever.from_defaults(nodes=all_nodes, similarity_top_k=5)
# 3. Fusion (Combine results)
retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k=5,
num_queries=1,
mode=FUSION_MODES.RECIPROCAL_RANK, # <--- USE ENUM (safest)
# OR use mode="reciprocal_rerank" (note the extra 're')
use_async=False,
)
# SOTA 3: CondensePlusContext
# Handles: "What is its accuracy?" -> "What is the accuracy of [Previous Topic]?"
memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
system_prompt = (
f"You are {RESEARCHER_NAME}. Answer questions about your research based ONLY on the provided context. "
f"If the answer is not in the context, say you don't know. "
f"Here is your CV for biographical context:\n{cv_content}"
)
return CondensePlusContextChatEngine.from_defaults(
retriever=retriever,
llm=Settings.llm,
memory=memory,
system_prompt=system_prompt,
verbose=True
)
chat_engine = get_chat_engine()
# --- 7. CHAT UI ---
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Hello! Ask me about my research."}]
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
if prompt := st.chat_input("Ask a question..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.write(prompt)
if chat_engine:
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = chat_engine.chat(prompt)
st.write(str(response))
st.session_state.messages.append({"role": "assistant", "content": str(response)})
# Log to HF Dataset
log_interaction(prompt, response)
else:
st.error("Index not loaded. Check 'data' folder.")