Spaces:

themehmi
/

RepoRaptor

Running

App Files Files Community

themehmi commited on 4 days ago

Commit

67baef8

verified ·

1 Parent(s): 130bc23

Upload 2 files

Browse files

Files changed (1) hide show

app.py +73 -14

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 import torch
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
@@ -64,8 +66,10 @@ device_status = "🟢 GPU Active" if torch.cuda.is_available() else "🟡 CPU Mo
 llm = load_llm()
 vector_db, file_count = setup_vector_db()
-prompt_template = """Use the following codebase context to answer the question.
-If you don't know the answer, just say that you don't know, don't try to make up code.
 Context: {context}
@@ -77,9 +81,11 @@ prompt = PromptTemplate.from_template(prompt_template)
 def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)
-if vector_db:
-    retriever = vector_db.as_retriever(search_kwargs={"k": 3})
-    qa_chain = (
         {"context": retriever, "input": RunnablePassthrough()}
         | RunnablePassthrough.assign(
             answer=(
@@ -90,13 +96,53 @@ if vector_db:
             )
         )
     )
-else:
-    qa_chain = None
-# 4. CHAT LOGIC
 def respond(message, chat_history):
     if not vector_db:
-        bot_message = "👋 Welcome! Please upload some Python files to the `./repo` directory and restart the server to start chatting."
         chat_history.append((message, bot_message))
         return "", chat_history
@@ -118,12 +164,17 @@ def respond(message, chat_history):
     chat_history.append((message, final_answer))
     return "", chat_history
-# 5. GRADIO UI
 custom_css = """
 .status-box { padding: 10px; border-radius: 8px; background-color: #f0f0f0; margin-bottom: 10px; }
 .dark .status-box { background-color: #1e293b; color: #cbd5e1; }
 """
 with gr.Blocks(title="Codebase Assistant", css=custom_css) as demo:
     with gr.Row():
         with gr.Column(scale=1):
@@ -133,10 +184,18 @@ with gr.Blocks(title="Codebase Assistant", css=custom_css) as demo:
             with gr.Column(elem_classes=["status-box"]):
                 gr.Markdown("### System Status")
                 gr.Markdown(f"**Hardware:** {device_status}")
-                if vector_db:
-                    gr.Markdown(f"**Repo Status:** {file_count} files indexed ✅")
-                else:
-                    gr.Markdown("**Repo Status:** Empty ❌\n\nDrop your `.py` files into the `/repo` folder to begin analyzing.")
         with gr.Column(scale=3):
             gr.Markdown("### 💻 Chat with your Codebase\nAsk architecture questions, find bugs, or request code explanations.")

 import gradio as gr
 import torch
 import os
+import shutil
+import subprocess
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
 llm = load_llm()
 vector_db, file_count = setup_vector_db()
+prompt_template = """You are a specialized Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions.
+If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.
+Use the following codebase context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up code.
 Context: {context}
 def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)
+def build_qa_chain(db):
+    if not db:
+        return None
+    retriever = db.as_retriever(search_kwargs={"k": 3})
+    return (
         {"context": retriever, "input": RunnablePassthrough()}
         | RunnablePassthrough.assign(
             answer=(
             )
         )
     )
+qa_chain = build_qa_chain(vector_db)
+# 4. INGESTION FUNCTIONS
+def clone_and_index(repo_url):
+    global vector_db, file_count, qa_chain
+    if os.path.exists('./repo'):
+        shutil.rmtree('./repo')
+    try:
+        subprocess.run(["git", "clone", repo_url, "./repo"], check=True)
+    except Exception as e:
+        return f"**Repo Status:** Failed to clone repo: {str(e)} ❌"
+    vector_db, file_count = setup_vector_db()
+    qa_chain = build_qa_chain(vector_db)
+    if vector_db:
+        return f"**Repo Status:** {file_count} files indexed from `{repo_url}` ✅"
+    else:
+        return f"**Repo Status:** No Python files found in `{repo_url}` ❌"
+def upload_and_index(files):
+    global vector_db, file_count, qa_chain
+    if os.path.exists('./repo'):
+        shutil.rmtree('./repo')
+    os.makedirs('./repo', exist_ok=True)
+    if not files:
+        return "**Repo Status:** No files uploaded ❌"
+    for file in files:
+        dest_path = os.path.join('./repo', os.path.basename(file.name))
+        shutil.copy(file.name, dest_path)
+    vector_db, file_count = setup_vector_db()
+    qa_chain = build_qa_chain(vector_db)
+    if vector_db:
+        return f"**Repo Status:** {file_count} files indexed from local upload ✅"
+    else:
+        return "**Repo Status:** No Python files found in local upload ❌"
+# 5. CHAT LOGIC
 def respond(message, chat_history):
     if not vector_db:
+        bot_message = "👋 Welcome! Please provide a repo link or upload Python files to start chatting."
         chat_history.append((message, bot_message))
         return "", chat_history
     chat_history.append((message, final_answer))
     return "", chat_history
+# 6. GRADIO UI
 custom_css = """
 .status-box { padding: 10px; border-radius: 8px; background-color: #f0f0f0; margin-bottom: 10px; }
 .dark .status-box { background-color: #1e293b; color: #cbd5e1; }
 """
+def get_initial_repo_status():
+    if vector_db:
+        return f"**Repo Status:** {file_count} files indexed ✅"
+    return "**Repo Status:** Empty ❌\n\nProvide a repo link or upload files to begin analyzing."
 with gr.Blocks(title="Codebase Assistant", css=custom_css) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Column(elem_classes=["status-box"]):
                 gr.Markdown("### System Status")
                 gr.Markdown(f"**Hardware:** {device_status}")
+                repo_status = gr.Markdown(get_initial_repo_status())
+            gr.Markdown("### Add Codebase")
+            with gr.Tab("GitHub Repo"):
+                repo_url = gr.Textbox(placeholder="https://github.com/user/repo", show_label=False)
+                clone_btn = gr.Button("Clone & Index")
+            with gr.Tab("Local Upload"):
+                local_files = gr.File(file_count="multiple", label="Upload Local Files", file_types=[".py"])
+                upload_btn = gr.Button("Upload & Index")
+            clone_btn.click(fn=clone_and_index, inputs=[repo_url], outputs=[repo_status])
+            upload_btn.click(fn=upload_and_index, inputs=[local_files], outputs=[repo_status])
         with gr.Column(scale=3):
             gr.Markdown("### 💻 Chat with your Codebase\nAsk architecture questions, find bugs, or request code explanations.")