code-crawler / app.py
Asish Karthikeya Gogineni
fix: Update Gemini model list to available models
937fe82
raw
history blame
24.3 kB
import streamlit as st
import os
import shutil
import time
from code_chatbot.universal_ingestor import process_source
from code_chatbot.indexer import Indexer
from code_chatbot.rag import ChatEngine
from code_chatbot.ast_analysis import ASTGraphBuilder
from code_chatbot.graph_rag import GraphEnhancedRetriever
import logging
from dotenv import load_dotenv
# Load Env
load_dotenv()
# Basic Setup
st.set_page_config(page_title="Code Chatbot", page_icon="💻", layout="wide")
logging.basicConfig(level=logging.INFO)
# --- Custom CSS for Premium Slate UI ---
import base64
def get_base64_logo():
try:
with open("assets/logo.png", "rb") as f:
data = f.read()
return base64.b64encode(data).decode()
except:
return ""
logo_b64 = get_base64_logo()
css = """
<style>
/* -------------------------------------------------------------------------- */
/* CORE ANIMATIONS */
/* -------------------------------------------------------------------------- */
@keyframes gradient-xy {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
@keyframes fadeInUp {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
/* -------------------------------------------------------------------------- */
/* GLOBAL THEME ENGINE */
/* -------------------------------------------------------------------------- */
@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap');
:root {
--primary-glow: 56, 189, 248; /* Sky Blue */
--secondary-glow: 139, 92, 246; /* Violet */
--bg-deep: #050608;
--glass-border: rgba(255, 255, 255, 0.08);
--glass-bg: rgba(15, 23, 42, 0.6);
}
.stApp {
background: radial-gradient(circle at 10% 20%, rgba(13, 17, 28, 1) 0%, rgba(5, 6, 8, 1) 90%);
font-family: 'Outfit', sans-serif;
}
/* BACKGROUND WATERMARK */
.stApp::before {
content: "";
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 70vh; /* Slightly smaller to fit nicely */
height: 70vh;
background-image: url("data:image/png;base64,LOGO_BASE64_PLACEHOLDER");
background-position: center;
background-repeat: no-repeat;
background-size: contain;
opacity: 0.08; /* Subtle but visible color */
pointer-events: none;
z-index: 0;
border-radius: 50%; /* Force Circular Shape */
}
/* Sidebar Logo - Standard Shape */
[data-testid="stSidebar"] img {
border-radius: 12px; /* Slight rounded corners for better aesthetics, but not circular */
box-shadow: 0 0 20px rgba(56, 189, 248, 0.3); /* Neon Glow */
border: 1px solid rgba(56, 189, 248, 0.5);
}
/* Global Text Override */
p, div, span, label, h1, h2, h3, h4, h5, h6, .stMarkdown {
color: #E2E8F0 !important;
text-shadow: 0 1px 2px rgba(0,0,0,0.3);
}
/* -------------------------------------------------------------------------- */
/* SIDEBAR */
/* -------------------------------------------------------------------------- */
section[data-testid="stSidebar"] {
background: rgba(11, 12, 16, 0.85);
backdrop-filter: blur(20px);
-webkit-backdrop-filter: blur(20px);
border-right: 1px solid var(--glass-border);
box-shadow: 5px 0 30px rgba(0,0,0,0.5);
}
section[data-testid="stSidebar"] h1 {
background: linear-gradient(to right, #38BDF8, #8B5CF6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 800;
font-size: 2rem !important;
padding-bottom: 0.5rem;
}
/* -------------------------------------------------------------------------- */
/* INPUTS & FORMS */
/* -------------------------------------------------------------------------- */
.stTextInput input, .stSelectbox div[data-baseweb="select"], .stTextArea textarea {
background-color: rgba(30, 41, 59, 0.5) !important;
border: 1px solid var(--glass-border) !important;
color: #F8FAFC !important;
border-radius: 12px !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
backdrop-filter: blur(10px);
}
.stTextInput input:focus, .stTextArea textarea:focus, .stSelectbox div[data-baseweb="select"]:focus-within {
border-color: #38BDF8 !important;
box-shadow: 0 0 15px rgba(var(--primary-glow), 0.3);
transform: translateY(-1px);
}
/* -------------------------------------------------------------------------- */
/* MEDIA UPLOADS */
/* -------------------------------------------------------------------------- */
[data-testid="stFileUploader"] {
background-color: rgba(30, 41, 59, 0.4);
border: 1px dashed var(--glass-border);
border-radius: 12px;
padding: 20px;
}
/* FORCE TEXT COLOR FOR FILE UPLOADER */
[data-testid="stFileUploader"] section > div,
[data-testid="stFileUploader"] section > div > span,
[data-testid="stFileUploader"] section > div > small,
[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"] p {
color: #E2E8F0 !important; /* Bright Slate */
opacity: 1 !important;
-webkit-text-fill-color: #E2E8F0 !important;
}
[data-testid="stFileUploader"] button {
background: rgba(56, 189, 248, 0.2);
color: #38BDF8 !important;
border: 1px solid #38BDF8;
}
/* -------------------------------------------------------------------------- */
/* DROPDOWN & SELECT */
/* -------------------------------------------------------------------------- */
/* 1. The Box Itself */
.stSelectbox div[data-baseweb="select"] {
background-color: #1E293B !important; /* Solid Slate-800 for contrast */
border: 1px solid #475569 !important;
color: white !important;
}
/* 2. The Text INSIDE the Box (Critical Fix) */
.stSelectbox div[data-baseweb="select"] div[data-testid="stMarkdownContainer"] > p {
color: #F8FAFC !important; /* White */
font-weight: 500 !important;
}
/* 3. The Dropdown Menu (Popup) */
div[data-baseweb="popover"], div[data-baseweb="menu"], ul[data-baseweb="menu"] {
background-color: #0F172A !important;
border: 1px solid #334155 !important;
}
/* 4. Options in the Menu */
li[data-baseweb="option"], div[data-baseweb="option"] {
color: #CBD5E1 !important; /* Light Slate */
}
/* 5. Start/Icons in Menu */
li[data-baseweb="option"] *, div[data-baseweb="option"] * {
color: #CBD5E1 !important;
}
/* 6. Selected/Hovered Option */
li[data-baseweb="option"][aria-selected="true"],
li[data-baseweb="option"]:hover,
div[data-baseweb="option"]:hover {
background-color: #38BDF8 !important;
color: white !important;
}
/* 7. SVG Arrow Icon */
.stSelectbox svg {
fill: #94A3B8 !important;
}
/* -------------------------------------------------------------------------- */
/* BUTTONS */
/* -------------------------------------------------------------------------- */
.stButton button {
background: linear-gradient(135deg, #0EA5E9 0%, #2563EB 100%);
color: white !important;
border: none;
border-radius: 12px;
padding: 0.75rem 1.5rem;
font-weight: 600;
letter-spacing: 0.5px;
transition: all 0.3s ease;
box-shadow: 0 4px 14px rgba(14, 165, 233, 0.3);
text-transform: uppercase;
font-size: 0.85rem;
}
.stButton button:hover {
transform: translateY(-2px) scale(1.02);
box-shadow: 0 6px 20px rgba(14, 165, 233, 0.5);
}
.stButton button:active {
transform: translateY(0);
}
/* -------------------------------------------------------------------------- */
/* CHAT BUBBLES */
/* -------------------------------------------------------------------------- */
.stChatMessage {
background: var(--glass-bg);
border: 1px solid var(--glass-border);
border-radius: 16px;
backdrop-filter: blur(10px);
margin-bottom: 1rem;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
animation: fadeInUp 0.4s ease-out forwards;
}
.stChatMessage[data-testid="stChatMessage"]:nth-child(even) {
border-left: 3px solid #38BDF8;
background: linear-gradient(90deg, rgba(56, 189, 248, 0.05) 0%, rgba(15, 23, 42, 0.6) 100%);
}
/* -------------------------------------------------------------------------- */
/* CODE & CHIPS */
/* -------------------------------------------------------------------------- */
code {
font-family: 'JetBrains Mono', monospace !important;
background: #0B0E14 !important;
border: 1px solid #1E293B;
border-radius: 6px;
color: #7DD3FC !important;
}
/* Source Chips with Glow */
.source-container {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-bottom: 16px;
padding-bottom: 12px;
border-bottom: 1px solid rgba(255,255,255,0.05);
}
.source-chip {
background: rgba(30, 41, 59, 0.6);
border: 1px solid rgba(56, 189, 248, 0.2);
border-radius: 20px;
padding: 6px 14px;
font-size: 0.8rem;
color: #94A3B8;
display: flex;
align-items: center;
transition: all 0.3s ease;
cursor: pointer;
backdrop-filter: blur(5px);
}
.source-chip:hover {
background: rgba(56, 189, 248, 0.15);
border-color: #38BDF8;
color: #38BDF8;
box-shadow: 0 0 10px rgba(56, 189, 248, 0.2);
transform: translateY(-1px);
}
.source-icon {
margin-right: 8px;
opacity: 0.7;
}
/* Hiding Streamlit Branding */
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
header {visibility: hidden;}
</style>
"""
st.markdown(css.replace("LOGO_BASE64_PLACEHOLDER", logo_b64), unsafe_allow_html=True)
# Session State
if "messages" not in st.session_state:
st.session_state.messages = []
if "chat_engine" not in st.session_state:
st.session_state.chat_engine = None
if "processed_files" not in st.session_state:
st.session_state.processed_files = False
# Sidebar
with st.sidebar:
# Logo
if os.path.exists("assets/logo.png"):
st.image("assets/logo.png", use_column_width=True)
st.title("🔧 Configuration")
# Provider Selection (Gemini & Groq only as requested)
provider = st.radio("LLM Provider", ["gemini", "groq"])
# Model Selection for Gemini
gemini_model = None
if provider == "gemini":
gemini_model = st.selectbox(
"Gemini Model",
[
"gemini-1.5-flash",
"gemini-1.5-pro",
"gemini-2.0-flash",
"gemini-2.5-flash", # May require newer API
],
index=0, # Default to 1.5 Flash (stable, free tier)
help="""**Gemini 1.5 Flash** (Recommended): Stable, fast, FREE tier (15 RPM)
**Gemini 1.5 Pro**: Better reasoning, 2M context, FREE tier
**Gemini 2.0 Flash**: Newer model, may have lower limits
**Gemini 2.5 Flash**: Latest, may require paid plan"""
)
st.caption(f"✨ Using {gemini_model}")
# Agentic Mode Toggle
use_agent = st.checkbox("Enable Agentic Reasoning 🤖", value=True, help="Allows the AI to browse files and reason multiple steps.")
# Determine Env Key Name
if provider == "gemini":
env_key_name = "GOOGLE_API_KEY"
elif provider == "groq":
env_key_name = "GROQ_API_KEY"
env_key = os.getenv(env_key_name)
api_key = env_key
if env_key:
st.success(f"✅ {env_key_name} loaded from environment.")
else:
# API Key Input
api_key_label = f"{provider.capitalize()} API Key"
api_key_input = st.text_input(api_key_label, type="password")
if api_key_input:
api_key = api_key_input
os.environ[env_key_name] = api_key
# Vector Database Selection
vector_db_type = st.selectbox("Vector Database", ["chroma", "faiss", "qdrant"])
if vector_db_type == "qdrant":
st.caption("☁️ connect to a hosted Qdrant cluster")
qdrant_url = st.text_input("Qdrant URL", placeholder="https://xyz.qdrant.io:6333", value=os.getenv("QDRANT_URL", ""))
qdrant_key = st.text_input("Qdrant API Key", type="password", value=os.getenv("QDRANT_API_KEY", ""))
if qdrant_url:
os.environ["QDRANT_URL"] = qdrant_url
if qdrant_key:
os.environ["QDRANT_API_KEY"] = qdrant_key
# For Groq, we need an embedding provider
# Use LOCAL embeddings by default - NO RATE LIMITS!
embedding_provider = "local" # Use local HuggingFace embeddings
embedding_api_key = api_key
if provider == "groq":
st.info(f"ℹ️ {provider.capitalize()} is used for Chat. Using LOCAL embeddings (no rate limits!).")
embedding_provider = "local" # Use local embeddings for Groq too
# Check Embedding Key for Gemini (not needed for local)
emb_env_key = os.getenv("GOOGLE_API_KEY")
if not emb_env_key and provider != "gemini":
embedding_api_key = emb_env_key # Optional now
else:
embedding_api_key = emb_env_key
st.divider()
# Ingestion Section
st.header("Import Codebase")
source_type = st.radio("Source Type", ["ZIP File", "GitHub Repository", "Web Documentation"])
source_input = None
if source_type == "ZIP File":
uploaded_file = st.file_uploader("Upload .zip file", type="zip")
if uploaded_file:
# Save strictly to a temp path for processing
os.makedirs("data", exist_ok=True)
source_input = os.path.join("data", "uploaded.zip")
with open(source_input, "wb") as f:
f.write(uploaded_file.getbuffer())
elif source_type == "GitHub Repository":
source_input = st.text_input("GitHub URL", placeholder="https://github.com/owner/repo")
elif source_type == "Web Documentation":
source_input = st.text_input("Web URL", placeholder="https://docs.python.org/3/")
if source_input and not st.session_state.processed_files:
if st.button("Process & Index"):
if not api_key:
st.error(f"Please provide {provider} API Key.")
elif provider == "groq" and not embedding_api_key:
st.error(f"Please provide {embedding_provider} API Key for embeddings.")
else:
# Use the new progress-tracked indexer
from code_chatbot.indexing_progress import index_with_progress
chat_engine, success = index_with_progress(
source_input=source_input,
source_type=source_type,
provider=provider,
embedding_provider=embedding_provider,
embedding_api_key=embedding_api_key,
vector_db_type=vector_db_type,
use_agent=use_agent,
api_key=api_key,
gemini_model=gemini_model # Pass selected model
)
if success:
st.session_state.chat_engine = chat_engine
st.session_state.processed_files = True
time.sleep(0.5) # Brief pause to show success
st.rerun()
if st.session_state.processed_files:
st.success(f"✅ Codebase Ready ({provider}) + AST 🧠")
# Show usage statistics if available
if st.session_state.chat_engine:
try:
from code_chatbot.rate_limiter import get_rate_limiter
limiter = get_rate_limiter(provider)
stats = limiter.get_usage_stats()
st.divider()
st.subheader("📊 API Usage")
col1, col2 = st.columns(2)
with col1:
st.metric("Requests/min", stats['requests_last_minute'])
st.metric("Cache Hits", stats['cache_size'])
with col2:
st.metric("Total Tokens", f"{stats['total_tokens']:,}")
rpm_limit = 15 if provider == "gemini" else 30
usage_pct = (stats['requests_last_minute'] / rpm_limit) * 100
st.progress(usage_pct / 100, text=f"{usage_pct:.0f}% of limit")
except Exception as e:
pass # Stats are optional
st.divider()
if st.button("🗑️ Clear Chat History"):
st.session_state.messages = []
st.rerun()
if st.button("Reset"):
# Clear disk data for a true reset
try:
if os.path.exists("chroma_db"):
shutil.rmtree("chroma_db")
if os.path.exists("data"):
shutil.rmtree("data")
except Exception as e:
st.error(f"Error clearing data: {e}")
st.session_state.processed_files = False
st.session_state.messages = []
st.session_state.chat_engine = None
st.rerun()
# Main Chat Interface
st.title("🕷️ Code Crawler")
# Multi-Mode Interface
if st.session_state.processed_files:
from components.multi_mode import (
render_mode_selector,
render_chat_mode,
render_search_mode,
render_refactor_mode,
render_generate_mode
)
# Mode selector at the top
selected_mode = render_mode_selector()
st.divider()
# Render appropriate interface based on mode
if selected_mode == "search":
render_search_mode()
elif selected_mode == "refactor":
render_refactor_mode()
elif selected_mode == "generate":
render_generate_mode(st.session_state.chat_engine)
else: # chat mode
# Show chat mode UI
render_chat_mode(st.session_state.chat_engine)
# Continue with standard chat interface below
st.caption(f"Ask questions about your uploaded project. (Using {provider}, Enhanced with AST)")
else:
st.caption(f"Configure and index your codebase to get started. (Using {provider}, Enhanced with AST)")
if not st.session_state.processed_files:
st.info("👈 Please upload and index a ZIP file to start.")
else:
# Display History
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
# Render Sources if available
if "sources" in msg and msg["sources"]:
unique_sources = {}
for s in msg["sources"]:
# Handle both dictionary and string formats for sources
if isinstance(s, dict):
fp = s.get('file_path', 'Unknown')
else:
fp = str(s)
if fp not in unique_sources:
unique_sources[fp] = s
chips_html = '<div class="source-container" style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 10px;">'
for fp in unique_sources:
basename = os.path.basename(fp) if "/" in fp else fp
chips_html += f"""
<div class="source-chip" style="background: rgba(30, 41, 59, 0.4); border: 1px solid rgba(148, 163, 184, 0.2); border-radius: 6px; padding: 4px 10px; font-size: 0.85em; color: #cbd5e1; display: flex; align-items: center; gap: 6px;">
<span class="source-icon">📄</span> {basename}
</div>
"""
chips_html += '</div>'
st.markdown(chips_html, unsafe_allow_html=True)
# Use unsafe_allow_html in case any formatted content exists
st.markdown(msg["content"], unsafe_allow_html=True)
# Input
if prompt := st.chat_input("How does the authentication work?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
if st.session_state.chat_engine:
with st.spinner("Analyzing (Graph+Vector)..."):
answer_payload = st.session_state.chat_engine.chat(prompt)
# Robust unpacking
if isinstance(answer_payload, tuple):
answer, sources = answer_payload
else:
answer = answer_payload
sources = []
if sources:
# Deduplicate sources based on file_path
unique_sources = {}
for s in sources:
fp = s.get('file_path', 'Unknown')
if fp not in unique_sources:
unique_sources[fp] = s
# Render Source Chips
chips_html = '<div class="source-container">'
for fp in unique_sources:
# Truncate path for display
basename = os.path.basename(fp)
chips_html += f"""
<div class="source-chip">
<span class="source-icon">📄</span> {basename}
</div>
"""
chips_html += '</div>'
st.markdown(chips_html, unsafe_allow_html=True)
st.markdown(answer)
# Append full formatted content to history so it persists
# We'll save the raw answer for history but re-render chips on load?
# Actually, for simplicity, let's just save the answer text. Streamlit re-runs the whole script,
# but we are storing manual history. Issues with reconstructing chips from history?
# The current history loop just does st.markdown(msg["content"]).
# We should probably append the chips HTML to the content if we want it to persist.
# Store structured message in history
# We store the raw answer and the sources list separately
# This avoids baking HTML into the content string which causes rendering issues
msg_data = {
"role": "assistant",
"content": answer,
"sources": sources if sources else []
}
st.session_state.messages.append(msg_data)
else:
st.error("Chat engine not initialized. Please re-index.")