Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import shutil | |
| import time | |
| from code_chatbot.universal_ingestor import process_source | |
| from code_chatbot.indexer import Indexer | |
| from code_chatbot.rag import ChatEngine | |
| from code_chatbot.ast_analysis import ASTGraphBuilder | |
| from code_chatbot.graph_rag import GraphEnhancedRetriever | |
| import logging | |
| from dotenv import load_dotenv | |
| # Load Env | |
| load_dotenv() | |
| # Basic Setup | |
| st.set_page_config(page_title="Code Chatbot", page_icon="💻", layout="wide") | |
| logging.basicConfig(level=logging.INFO) | |
| # --- Custom CSS for Premium Slate UI --- | |
| import base64 | |
| def get_base64_logo(): | |
| try: | |
| with open("assets/logo.png", "rb") as f: | |
| data = f.read() | |
| return base64.b64encode(data).decode() | |
| except: | |
| return "" | |
| logo_b64 = get_base64_logo() | |
| css = """ | |
| <style> | |
| /* -------------------------------------------------------------------------- */ | |
| /* CORE ANIMATIONS */ | |
| /* -------------------------------------------------------------------------- */ | |
| @keyframes gradient-xy { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| @keyframes fadeInUp { | |
| from { opacity: 0; transform: translateY(10px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* GLOBAL THEME ENGINE */ | |
| /* -------------------------------------------------------------------------- */ | |
| @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap'); | |
| :root { | |
| --primary-glow: 56, 189, 248; /* Sky Blue */ | |
| --secondary-glow: 139, 92, 246; /* Violet */ | |
| --bg-deep: #050608; | |
| --glass-border: rgba(255, 255, 255, 0.08); | |
| --glass-bg: rgba(15, 23, 42, 0.6); | |
| } | |
| .stApp { | |
| background: radial-gradient(circle at 10% 20%, rgba(13, 17, 28, 1) 0%, rgba(5, 6, 8, 1) 90%); | |
| font-family: 'Outfit', sans-serif; | |
| } | |
| /* BACKGROUND WATERMARK */ | |
| .stApp::before { | |
| content: ""; | |
| position: absolute; | |
| top: 50%; | |
| left: 50%; | |
| transform: translate(-50%, -50%); | |
| width: 70vh; /* Slightly smaller to fit nicely */ | |
| height: 70vh; | |
| background-image: url("data:image/png;base64,LOGO_BASE64_PLACEHOLDER"); | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| background-size: contain; | |
| opacity: 0.08; /* Subtle but visible color */ | |
| pointer-events: none; | |
| z-index: 0; | |
| border-radius: 50%; /* Force Circular Shape */ | |
| } | |
| /* Sidebar Logo - Standard Shape */ | |
| [data-testid="stSidebar"] img { | |
| border-radius: 12px; /* Slight rounded corners for better aesthetics, but not circular */ | |
| box-shadow: 0 0 20px rgba(56, 189, 248, 0.3); /* Neon Glow */ | |
| border: 1px solid rgba(56, 189, 248, 0.5); | |
| } | |
| /* Global Text Override */ | |
| p, div, span, label, h1, h2, h3, h4, h5, h6, .stMarkdown { | |
| color: #E2E8F0 !important; | |
| text-shadow: 0 1px 2px rgba(0,0,0,0.3); | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* SIDEBAR */ | |
| /* -------------------------------------------------------------------------- */ | |
| section[data-testid="stSidebar"] { | |
| background: rgba(11, 12, 16, 0.85); | |
| backdrop-filter: blur(20px); | |
| -webkit-backdrop-filter: blur(20px); | |
| border-right: 1px solid var(--glass-border); | |
| box-shadow: 5px 0 30px rgba(0,0,0,0.5); | |
| } | |
| section[data-testid="stSidebar"] h1 { | |
| background: linear-gradient(to right, #38BDF8, #8B5CF6); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-weight: 800; | |
| font-size: 2rem !important; | |
| padding-bottom: 0.5rem; | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* INPUTS & FORMS */ | |
| /* -------------------------------------------------------------------------- */ | |
| .stTextInput input, .stSelectbox div[data-baseweb="select"], .stTextArea textarea { | |
| background-color: rgba(30, 41, 59, 0.5) !important; | |
| border: 1px solid var(--glass-border) !important; | |
| color: #F8FAFC !important; | |
| border-radius: 12px !important; | |
| transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); | |
| backdrop-filter: blur(10px); | |
| } | |
| .stTextInput input:focus, .stTextArea textarea:focus, .stSelectbox div[data-baseweb="select"]:focus-within { | |
| border-color: #38BDF8 !important; | |
| box-shadow: 0 0 15px rgba(var(--primary-glow), 0.3); | |
| transform: translateY(-1px); | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* MEDIA UPLOADS */ | |
| /* -------------------------------------------------------------------------- */ | |
| [data-testid="stFileUploader"] { | |
| background-color: rgba(30, 41, 59, 0.4); | |
| border: 1px dashed var(--glass-border); | |
| border-radius: 12px; | |
| padding: 20px; | |
| } | |
| /* FORCE TEXT COLOR FOR FILE UPLOADER */ | |
| [data-testid="stFileUploader"] section > div, | |
| [data-testid="stFileUploader"] section > div > span, | |
| [data-testid="stFileUploader"] section > div > small, | |
| [data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"] p { | |
| color: #E2E8F0 !important; /* Bright Slate */ | |
| opacity: 1 !important; | |
| -webkit-text-fill-color: #E2E8F0 !important; | |
| } | |
| [data-testid="stFileUploader"] button { | |
| background: rgba(56, 189, 248, 0.2); | |
| color: #38BDF8 !important; | |
| border: 1px solid #38BDF8; | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* DROPDOWN & SELECT */ | |
| /* -------------------------------------------------------------------------- */ | |
| /* 1. The Box Itself */ | |
| .stSelectbox div[data-baseweb="select"] { | |
| background-color: #1E293B !important; /* Solid Slate-800 for contrast */ | |
| border: 1px solid #475569 !important; | |
| color: white !important; | |
| } | |
| /* 2. The Text INSIDE the Box (Critical Fix) */ | |
| .stSelectbox div[data-baseweb="select"] div[data-testid="stMarkdownContainer"] > p { | |
| color: #F8FAFC !important; /* White */ | |
| font-weight: 500 !important; | |
| } | |
| /* 3. The Dropdown Menu (Popup) */ | |
| div[data-baseweb="popover"], div[data-baseweb="menu"], ul[data-baseweb="menu"] { | |
| background-color: #0F172A !important; | |
| border: 1px solid #334155 !important; | |
| } | |
| /* 4. Options in the Menu */ | |
| li[data-baseweb="option"], div[data-baseweb="option"] { | |
| color: #CBD5E1 !important; /* Light Slate */ | |
| } | |
| /* 5. Start/Icons in Menu */ | |
| li[data-baseweb="option"] *, div[data-baseweb="option"] * { | |
| color: #CBD5E1 !important; | |
| } | |
| /* 6. Selected/Hovered Option */ | |
| li[data-baseweb="option"][aria-selected="true"], | |
| li[data-baseweb="option"]:hover, | |
| div[data-baseweb="option"]:hover { | |
| background-color: #38BDF8 !important; | |
| color: white !important; | |
| } | |
| /* 7. SVG Arrow Icon */ | |
| .stSelectbox svg { | |
| fill: #94A3B8 !important; | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* BUTTONS */ | |
| /* -------------------------------------------------------------------------- */ | |
| .stButton button { | |
| background: linear-gradient(135deg, #0EA5E9 0%, #2563EB 100%); | |
| color: white !important; | |
| border: none; | |
| border-radius: 12px; | |
| padding: 0.75rem 1.5rem; | |
| font-weight: 600; | |
| letter-spacing: 0.5px; | |
| transition: all 0.3s ease; | |
| box-shadow: 0 4px 14px rgba(14, 165, 233, 0.3); | |
| text-transform: uppercase; | |
| font-size: 0.85rem; | |
| } | |
| .stButton button:hover { | |
| transform: translateY(-2px) scale(1.02); | |
| box-shadow: 0 6px 20px rgba(14, 165, 233, 0.5); | |
| } | |
| .stButton button:active { | |
| transform: translateY(0); | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* CHAT BUBBLES */ | |
| /* -------------------------------------------------------------------------- */ | |
| .stChatMessage { | |
| background: var(--glass-bg); | |
| border: 1px solid var(--glass-border); | |
| border-radius: 16px; | |
| backdrop-filter: blur(10px); | |
| margin-bottom: 1rem; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| animation: fadeInUp 0.4s ease-out forwards; | |
| } | |
| .stChatMessage[data-testid="stChatMessage"]:nth-child(even) { | |
| border-left: 3px solid #38BDF8; | |
| background: linear-gradient(90deg, rgba(56, 189, 248, 0.05) 0%, rgba(15, 23, 42, 0.6) 100%); | |
| } | |
| /* -------------------------------------------------------------------------- */ | |
| /* CODE & CHIPS */ | |
| /* -------------------------------------------------------------------------- */ | |
| code { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| background: #0B0E14 !important; | |
| border: 1px solid #1E293B; | |
| border-radius: 6px; | |
| color: #7DD3FC !important; | |
| } | |
| /* Source Chips with Glow */ | |
| .source-container { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-bottom: 16px; | |
| padding-bottom: 12px; | |
| border-bottom: 1px solid rgba(255,255,255,0.05); | |
| } | |
| .source-chip { | |
| background: rgba(30, 41, 59, 0.6); | |
| border: 1px solid rgba(56, 189, 248, 0.2); | |
| border-radius: 20px; | |
| padding: 6px 14px; | |
| font-size: 0.8rem; | |
| color: #94A3B8; | |
| display: flex; | |
| align-items: center; | |
| transition: all 0.3s ease; | |
| cursor: pointer; | |
| backdrop-filter: blur(5px); | |
| } | |
| .source-chip:hover { | |
| background: rgba(56, 189, 248, 0.15); | |
| border-color: #38BDF8; | |
| color: #38BDF8; | |
| box-shadow: 0 0 10px rgba(56, 189, 248, 0.2); | |
| transform: translateY(-1px); | |
| } | |
| .source-icon { | |
| margin-right: 8px; | |
| opacity: 0.7; | |
| } | |
| /* Hiding Streamlit Branding */ | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| header {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(css.replace("LOGO_BASE64_PLACEHOLDER", logo_b64), unsafe_allow_html=True) | |
| # Session State | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if "chat_engine" not in st.session_state: | |
| st.session_state.chat_engine = None | |
| if "processed_files" not in st.session_state: | |
| st.session_state.processed_files = False | |
| # Sidebar | |
| with st.sidebar: | |
| # Logo | |
| if os.path.exists("assets/logo.png"): | |
| st.image("assets/logo.png", use_column_width=True) | |
| st.title("🔧 Configuration") | |
| # Provider Selection (Gemini & Groq only as requested) | |
| provider = st.radio("LLM Provider", ["gemini", "groq"]) | |
| # Model Selection for Gemini | |
| gemini_model = None | |
| if provider == "gemini": | |
| gemini_model = st.selectbox( | |
| "Gemini Model", | |
| [ | |
| "gemini-1.5-flash", | |
| "gemini-1.5-pro", | |
| "gemini-2.0-flash", | |
| "gemini-2.5-flash", # May require newer API | |
| ], | |
| index=0, # Default to 1.5 Flash (stable, free tier) | |
| help="""**Gemini 1.5 Flash** (Recommended): Stable, fast, FREE tier (15 RPM) | |
| **Gemini 1.5 Pro**: Better reasoning, 2M context, FREE tier | |
| **Gemini 2.0 Flash**: Newer model, may have lower limits | |
| **Gemini 2.5 Flash**: Latest, may require paid plan""" | |
| ) | |
| st.caption(f"✨ Using {gemini_model}") | |
| # Agentic Mode Toggle | |
| use_agent = st.checkbox("Enable Agentic Reasoning 🤖", value=True, help="Allows the AI to browse files and reason multiple steps.") | |
| # Determine Env Key Name | |
| if provider == "gemini": | |
| env_key_name = "GOOGLE_API_KEY" | |
| elif provider == "groq": | |
| env_key_name = "GROQ_API_KEY" | |
| env_key = os.getenv(env_key_name) | |
| api_key = env_key | |
| if env_key: | |
| st.success(f"✅ {env_key_name} loaded from environment.") | |
| else: | |
| # API Key Input | |
| api_key_label = f"{provider.capitalize()} API Key" | |
| api_key_input = st.text_input(api_key_label, type="password") | |
| if api_key_input: | |
| api_key = api_key_input | |
| os.environ[env_key_name] = api_key | |
| # Vector Database Selection | |
| vector_db_type = st.selectbox("Vector Database", ["chroma", "faiss", "qdrant"]) | |
| if vector_db_type == "qdrant": | |
| st.caption("☁️ connect to a hosted Qdrant cluster") | |
| qdrant_url = st.text_input("Qdrant URL", placeholder="https://xyz.qdrant.io:6333", value=os.getenv("QDRANT_URL", "")) | |
| qdrant_key = st.text_input("Qdrant API Key", type="password", value=os.getenv("QDRANT_API_KEY", "")) | |
| if qdrant_url: | |
| os.environ["QDRANT_URL"] = qdrant_url | |
| if qdrant_key: | |
| os.environ["QDRANT_API_KEY"] = qdrant_key | |
| # For Groq, we need an embedding provider | |
| # Use LOCAL embeddings by default - NO RATE LIMITS! | |
| embedding_provider = "local" # Use local HuggingFace embeddings | |
| embedding_api_key = api_key | |
| if provider == "groq": | |
| st.info(f"ℹ️ {provider.capitalize()} is used for Chat. Using LOCAL embeddings (no rate limits!).") | |
| embedding_provider = "local" # Use local embeddings for Groq too | |
| # Check Embedding Key for Gemini (not needed for local) | |
| emb_env_key = os.getenv("GOOGLE_API_KEY") | |
| if not emb_env_key and provider != "gemini": | |
| embedding_api_key = emb_env_key # Optional now | |
| else: | |
| embedding_api_key = emb_env_key | |
| st.divider() | |
| # Ingestion Section | |
| st.header("Import Codebase") | |
| source_type = st.radio("Source Type", ["ZIP File", "GitHub Repository", "Web Documentation"]) | |
| source_input = None | |
| if source_type == "ZIP File": | |
| uploaded_file = st.file_uploader("Upload .zip file", type="zip") | |
| if uploaded_file: | |
| # Save strictly to a temp path for processing | |
| os.makedirs("data", exist_ok=True) | |
| source_input = os.path.join("data", "uploaded.zip") | |
| with open(source_input, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| elif source_type == "GitHub Repository": | |
| source_input = st.text_input("GitHub URL", placeholder="https://github.com/owner/repo") | |
| elif source_type == "Web Documentation": | |
| source_input = st.text_input("Web URL", placeholder="https://docs.python.org/3/") | |
| if source_input and not st.session_state.processed_files: | |
| if st.button("Process & Index"): | |
| if not api_key: | |
| st.error(f"Please provide {provider} API Key.") | |
| elif provider == "groq" and not embedding_api_key: | |
| st.error(f"Please provide {embedding_provider} API Key for embeddings.") | |
| else: | |
| # Use the new progress-tracked indexer | |
| from code_chatbot.indexing_progress import index_with_progress | |
| chat_engine, success = index_with_progress( | |
| source_input=source_input, | |
| source_type=source_type, | |
| provider=provider, | |
| embedding_provider=embedding_provider, | |
| embedding_api_key=embedding_api_key, | |
| vector_db_type=vector_db_type, | |
| use_agent=use_agent, | |
| api_key=api_key, | |
| gemini_model=gemini_model # Pass selected model | |
| ) | |
| if success: | |
| st.session_state.chat_engine = chat_engine | |
| st.session_state.processed_files = True | |
| time.sleep(0.5) # Brief pause to show success | |
| st.rerun() | |
| if st.session_state.processed_files: | |
| st.success(f"✅ Codebase Ready ({provider}) + AST 🧠") | |
| # Show usage statistics if available | |
| if st.session_state.chat_engine: | |
| try: | |
| from code_chatbot.rate_limiter import get_rate_limiter | |
| limiter = get_rate_limiter(provider) | |
| stats = limiter.get_usage_stats() | |
| st.divider() | |
| st.subheader("📊 API Usage") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Requests/min", stats['requests_last_minute']) | |
| st.metric("Cache Hits", stats['cache_size']) | |
| with col2: | |
| st.metric("Total Tokens", f"{stats['total_tokens']:,}") | |
| rpm_limit = 15 if provider == "gemini" else 30 | |
| usage_pct = (stats['requests_last_minute'] / rpm_limit) * 100 | |
| st.progress(usage_pct / 100, text=f"{usage_pct:.0f}% of limit") | |
| except Exception as e: | |
| pass # Stats are optional | |
| st.divider() | |
| if st.button("🗑️ Clear Chat History"): | |
| st.session_state.messages = [] | |
| st.rerun() | |
| if st.button("Reset"): | |
| # Clear disk data for a true reset | |
| try: | |
| if os.path.exists("chroma_db"): | |
| shutil.rmtree("chroma_db") | |
| if os.path.exists("data"): | |
| shutil.rmtree("data") | |
| except Exception as e: | |
| st.error(f"Error clearing data: {e}") | |
| st.session_state.processed_files = False | |
| st.session_state.messages = [] | |
| st.session_state.chat_engine = None | |
| st.rerun() | |
| # Main Chat Interface | |
| st.title("🕷️ Code Crawler") | |
| # Multi-Mode Interface | |
| if st.session_state.processed_files: | |
| from components.multi_mode import ( | |
| render_mode_selector, | |
| render_chat_mode, | |
| render_search_mode, | |
| render_refactor_mode, | |
| render_generate_mode | |
| ) | |
| # Mode selector at the top | |
| selected_mode = render_mode_selector() | |
| st.divider() | |
| # Render appropriate interface based on mode | |
| if selected_mode == "search": | |
| render_search_mode() | |
| elif selected_mode == "refactor": | |
| render_refactor_mode() | |
| elif selected_mode == "generate": | |
| render_generate_mode(st.session_state.chat_engine) | |
| else: # chat mode | |
| # Show chat mode UI | |
| render_chat_mode(st.session_state.chat_engine) | |
| # Continue with standard chat interface below | |
| st.caption(f"Ask questions about your uploaded project. (Using {provider}, Enhanced with AST)") | |
| else: | |
| st.caption(f"Configure and index your codebase to get started. (Using {provider}, Enhanced with AST)") | |
| if not st.session_state.processed_files: | |
| st.info("👈 Please upload and index a ZIP file to start.") | |
| else: | |
| # Display History | |
| for msg in st.session_state.messages: | |
| with st.chat_message(msg["role"]): | |
| # Render Sources if available | |
| if "sources" in msg and msg["sources"]: | |
| unique_sources = {} | |
| for s in msg["sources"]: | |
| # Handle both dictionary and string formats for sources | |
| if isinstance(s, dict): | |
| fp = s.get('file_path', 'Unknown') | |
| else: | |
| fp = str(s) | |
| if fp not in unique_sources: | |
| unique_sources[fp] = s | |
| chips_html = '<div class="source-container" style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 10px;">' | |
| for fp in unique_sources: | |
| basename = os.path.basename(fp) if "/" in fp else fp | |
| chips_html += f""" | |
| <div class="source-chip" style="background: rgba(30, 41, 59, 0.4); border: 1px solid rgba(148, 163, 184, 0.2); border-radius: 6px; padding: 4px 10px; font-size: 0.85em; color: #cbd5e1; display: flex; align-items: center; gap: 6px;"> | |
| <span class="source-icon">📄</span> {basename} | |
| </div> | |
| """ | |
| chips_html += '</div>' | |
| st.markdown(chips_html, unsafe_allow_html=True) | |
| # Use unsafe_allow_html in case any formatted content exists | |
| st.markdown(msg["content"], unsafe_allow_html=True) | |
| # Input | |
| if prompt := st.chat_input("How does the authentication work?"): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| if st.session_state.chat_engine: | |
| with st.spinner("Analyzing (Graph+Vector)..."): | |
| answer_payload = st.session_state.chat_engine.chat(prompt) | |
| # Robust unpacking | |
| if isinstance(answer_payload, tuple): | |
| answer, sources = answer_payload | |
| else: | |
| answer = answer_payload | |
| sources = [] | |
| if sources: | |
| # Deduplicate sources based on file_path | |
| unique_sources = {} | |
| for s in sources: | |
| fp = s.get('file_path', 'Unknown') | |
| if fp not in unique_sources: | |
| unique_sources[fp] = s | |
| # Render Source Chips | |
| chips_html = '<div class="source-container">' | |
| for fp in unique_sources: | |
| # Truncate path for display | |
| basename = os.path.basename(fp) | |
| chips_html += f""" | |
| <div class="source-chip"> | |
| <span class="source-icon">📄</span> {basename} | |
| </div> | |
| """ | |
| chips_html += '</div>' | |
| st.markdown(chips_html, unsafe_allow_html=True) | |
| st.markdown(answer) | |
| # Append full formatted content to history so it persists | |
| # We'll save the raw answer for history but re-render chips on load? | |
| # Actually, for simplicity, let's just save the answer text. Streamlit re-runs the whole script, | |
| # but we are storing manual history. Issues with reconstructing chips from history? | |
| # The current history loop just does st.markdown(msg["content"]). | |
| # We should probably append the chips HTML to the content if we want it to persist. | |
| # Store structured message in history | |
| # We store the raw answer and the sources list separately | |
| # This avoids baking HTML into the content string which causes rendering issues | |
| msg_data = { | |
| "role": "assistant", | |
| "content": answer, | |
| "sources": sources if sources else [] | |
| } | |
| st.session_state.messages.append(msg_data) | |
| else: | |
| st.error("Chat engine not initialized. Please re-index.") | |