import gradio as gr import torch import os import shutil import subprocess os.environ["TOKENIZERS_PARALLELISM"] = "false" # Maximize Hugging Face CPU Tier performance by limiting thread thrashing torch.set_num_threads(os.cpu_count() or 2) from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain_text_splitters import RecursiveCharacterTextSplitter, Language from langchain_core.documents import Document EXTENSION_TO_LANGUAGE = { '.py': Language.PYTHON, '.js': Language.JS, '.ts': Language.JS, '.java': Language.JAVA, '.cpp': Language.CPP, '.c': Language.CPP, '.h': Language.CPP, '.go': Language.GO, '.rs': Language.RUST, '.rb': Language.RUBY, '.html': Language.HTML, '.md': Language.MARKDOWN, } from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline from langchain_community.vectorstores import FAISS from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import PromptTemplate # 1. HARDWARE OPTIMIZED LLM LOADING def load_llm(): model_id = "Qwen/Qwen2.5-Coder-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="cpu", torch_dtype=torch.float32, low_cpu_mem_usage=True ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, return_full_text=False ) return HuggingFacePipeline( pipeline=pipe, pipeline_kwargs={"max_new_tokens": 512, "temperature": 0.1, "repetition_penalty": 1.1} ) # 2. CODE INGESTION & VECTOR DATABASE def setup_vector_db(): if not os.path.exists('./repo'): os.makedirs('./repo') docs_by_language = {} generic_docs = [] file_count = 0 for root, _, files in os.walk('./repo'): if '.git' in root: continue for file in files: file_path = os.path.join(root, file) ext = os.path.splitext(file)[1].lower() try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() doc = Document(page_content=content, metadata={"source": file_path}) file_count += 1 lang = EXTENSION_TO_LANGUAGE.get(ext) if lang: if lang not in docs_by_language: docs_by_language[lang] = [] docs_by_language[lang].append(doc) else: generic_docs.append(doc) except UnicodeDecodeError: pass # Skip binary files if file_count == 0: return None, 0 all_splits = [] # Split documents by specific language rules for lang, docs in docs_by_language.items(): try: splitter = RecursiveCharacterTextSplitter.from_language( language=lang, chunk_size=1000, chunk_overlap=200 ) all_splits.extend(splitter.split_documents(docs)) except Exception: # Fallback if language is not supported by installed langchain version generic_docs.extend(docs) # Split generic documents if generic_docs: generic_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) all_splits.extend(generic_splitter.split_documents(generic_docs)) if not all_splits: return None, 0 embeddings = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False} ) db = FAISS.from_documents(all_splits, embeddings) return db, file_count # 3. GLOBAL INITIALIZATION print("Initializing models...") device_status = "🟢 GPU Active" if torch.cuda.is_available() else "🟡 CPU Mode" llm = load_llm() vector_db, file_count = setup_vector_db() prompt_template = """You are an expert Software Engineer and Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions. If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant. When answering: 1. Carefully analyze the provided context. 2. Provide a clear, step-by-step explanation. 3. If providing code, use markdown code blocks. 4. If the answer cannot be found in the context, explicitly state that you don't know rather than hallucinating. Codebase Context: {context} Question: {input} Expert Developer Answer:""" prompt = PromptTemplate.from_template(prompt_template) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def build_qa_chain(db): if not db: return None retriever = db.as_retriever(search_kwargs={"k": 5}) return ( {"context": retriever, "input": RunnablePassthrough()} | RunnablePassthrough.assign( answer=( RunnablePassthrough.assign(context=lambda x: format_docs(x["context"])) | prompt | llm | StrOutputParser() ) ) ) qa_chain = build_qa_chain(vector_db) # 4. INGESTION FUNCTIONS def clone_and_index(repo_url): global vector_db, file_count, qa_chain if not repo_url or not repo_url.strip(): return "⚠️ Please enter a valid GitHub URL." if os.path.exists('./repo'): shutil.rmtree('./repo') try: subprocess.run(["git", "clone", repo_url.strip(), "./repo"], check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: return f"❌ Failed to clone repo. Error: {e.stderr}" except Exception as e: return f"❌ Failed to clone repo: {str(e)}" vector_db, file_count = setup_vector_db() qa_chain = build_qa_chain(vector_db) if vector_db: return f"✅ Success! {file_count} files indexed from `{repo_url}`" else: return f"⚠️ Warning: No valid text files found in `{repo_url}`" def upload_and_index(files): global vector_db, file_count, qa_chain if not files: return "⚠️ No files were uploaded." if os.path.exists('./repo'): shutil.rmtree('./repo') os.makedirs('./repo', exist_ok=True) for file in files: # Handle both Gradio 3 (filepath string) and Gradio 4 (File object) file_path = getattr(file, "name", str(file)) dest_path = os.path.join('./repo', os.path.basename(file_path)) shutil.copy(file_path, dest_path) vector_db, file_count = setup_vector_db() qa_chain = build_qa_chain(vector_db) if vector_db: return f"✅ Success! {file_count} files indexed from local upload" else: return "⚠️ Warning: No valid text files found in the uploaded files" # 5. CHAT LOGIC def respond(message, chat_history): if not message.strip(): return "", chat_history if not vector_db: bot_message = "👋 Welcome! Please provide a repo link or upload your code files using the panel on the left to start chatting." chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": bot_message}) return "", chat_history try: # Fetch response from RAG response = qa_chain.invoke(message) answer = response["answer"] sources = response["context"] final_answer = answer if sources: final_answer += "\n\n
🔍 View Source Code Referenced\n\n" for idx, doc in enumerate(sources): source_file = doc.metadata.get("source", "Unknown File") final_answer += f"**Snippet {idx + 1}** from `{source_file}`:\n" final_answer += f"```python\n{doc.page_content}\n```\n\n" final_answer += "
" chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": final_answer}) except Exception as e: bot_message = f"❌ An error occurred during processing: {str(e)}" chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": bot_message}) return "", chat_history # 6. GRADIO UI custom_css = """ .status-box { padding: 15px; border-radius: 8px; background-color: #f0f0f0; margin-bottom: 20px; border-left: 4px solid #007bff;} .dark .status-box { background-color: #1e293b; color: #cbd5e1; border-left: 4px solid #3b82f6;} .instructions { font-size: 0.95em; color: #555; } .dark .instructions { color: #aaa; } """ def get_initial_repo_status(): if vector_db: return f"✅ **Ready!** {file_count} files indexed and loaded." return "❌ **Empty Database.** Provide a codebase below to begin." with gr.Blocks(title="Codebase Assistant") as demo: with gr.Row(): with gr.Column(scale=1): gr.Markdown("# 🦖 RepoRaptor\n**Your personal AI codebase expert.**") gr.Markdown("---") with gr.Column(elem_classes=["status-box"]): gr.Markdown("### 📊 System Status") gr.Markdown(f"**Hardware:** {device_status}") repo_status = gr.Markdown(get_initial_repo_status()) gr.Markdown("### 📂 Ingest Codebase") gr.Markdown("Choose a method to load your codebase into the Vector Database.", elem_classes=["instructions"]) with gr.Tabs(): with gr.Tab("GitHub Repo"): gr.Markdown("Clone a public repository directly:") repo_url = gr.Textbox(placeholder="https://github.com/user/repo", show_label=False) clone_btn = gr.Button("⬇️ Clone & Index", variant="primary") with gr.Tab("Local Upload"): gr.Markdown("Upload local codebase files:") local_files = gr.File(file_count="multiple", label="Upload Files") upload_btn = gr.Button("📤 Upload & Index", variant="primary") clone_btn.click(fn=clone_and_index, inputs=[repo_url], outputs=[repo_status]) upload_btn.click(fn=upload_and_index, inputs=[local_files], outputs=[repo_status]) with gr.Column(scale=3): gr.Markdown("### 💻 Chat Interface\nAsk architecture questions, find bugs, or request code explanations. I will **only** answer questions related to code.") chatbot = gr.Chatbot(height=600, show_label=False) with gr.Row(): msg = gr.Textbox(placeholder="E.g., What does the main function do? (Press Enter to send)", show_label=False, scale=4) clear = gr.Button("🗑️ Clear Chat", scale=1) msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) clear.click(lambda: ("", []), inputs=None, outputs=[msg, chatbot], queue=False) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", css=custom_css, theme=gr.themes.Soft())