Spaces:

aman1762
/

GenAI

Sleeping

aman1762 commited on Jan 19

Commit

3bc9c63

verified ·

1 Parent(s): 5df0799

Upload 8 files

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,19 +1,22 @@
----
-title: GenAI
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# CODEBASE-RAG-ASSISTANT
+A LangChain-based RAG system that indexes a code repository and answers architecture &amp; code-level questions using free LLMs.
+## Codebase RAG Assistant
+An AI-powered Retrieval-Augmented Generation system that enables natural language querying of large code repositories.
+### Features
+- Code-aware chunking (functions & classes)
+- FAISS-based vector retrieval
+- LLaMA-3 inference via Groq
+- Streamlit UI
+- Fully free tech stack
+### Tech Stack
+LangChain, FAISS, HuggingFace Embeddings, FastAPI, Streamlit
+### Use Cases
+- Codebase understanding
+- Architecture exploration
+- Developer onboarding

api.py ADDED Viewed

+from fastapi import FastAPI
+from ingest import load_repo, ingest_repo
+from vectorstore import create_vectorstore
+from rag_chain import build_rag_chain
+import os
+app = FastAPI()
+qa_chain = None
+@app.post("/load")
+def load_repository(repo_url: str):
+    global qa_chain
+    path = load_repo(repo_url)
+    docs = ingest_repo(path)
+    vectorstore = create_vectorstore(docs)
+    qa_chain = build_rag_chain(vectorstore, os.getenv("GROQ_API_KEY"))
+    return {"status": "Repository indexed"}
+@app.get("/ask")
+def ask(question: str):
+    result = qa_chain(question)
+    return {
+        "answer": result["result"],
+        "sources": [doc.metadata["file"] for doc in result["source_documents"]]
+    }

app.py ADDED Viewed

+import streamlit as st
+import requests
+st.title("🧠 Codebase RAG Assistant")
+repo_url = st.text_input("GitHub Repository URL")
+if st.button("Index Repository"):
+    res = requests.post(
+        "http://localhost:8000/load",
+        params={"repo_url": repo_url}
+    )
+    st.success("Repository indexed!")
+question = st.text_input("Ask a question about the codebase")
+if st.button("Ask"):
+    res = requests.get(
+        "http://localhost:8000/ask",
+        params={"question": question}
+    ).json()
+    st.write("### Answer")
+    st.write(res["answer"])
+    st.write("### Sources")
+    for src in res["sources"]:
+        st.write(src)

chunker.py ADDED Viewed

+import re
+from langchain.schema import Document
+def chunk_code(file_path, code):
+    chunks = []
+    functions = re.split(r'\n(?=def |class )', code)
+    for block in functions:
+        if len(block.strip()) > 50:
+            chunks.append(
+                Document(
+                    page_content=block,
+                    metadata={"file": file_path}
+                )
+            )
+    return chunks

ingest.py ADDED Viewed

+import os
+from git import Repo
+from chunker import chunk_code
+SUPPORTED_EXT = (".py", ".js", ".java", ".cpp")
+def load_repo(repo_url, local_dir="repo"):
+    if os.path.exists(local_dir):
+        return local_dir
+    Repo.clone_from(repo_url, local_dir)
+    return local_dir
+def ingest_repo(repo_path):
+    documents = []
+    for root, _, files in os.walk(repo_path):
+        for file in files:
+            if file.endswith(SUPPORTED_EXT):
+                path = os.path.join(root, file)
+                with open(path, "r", errors="ignore") as f:
+                    code = f.read()
+                    documents.extend(chunk_code(path, code))
+    return documents

rag_chain.py ADDED Viewed

+from langchain.chains import RetrievalQA
+from langchain.llms import Groq
+def build_rag_chain(vectorstore, groq_api_key):
+    llm = Groq(
+        api_key=groq_api_key,
+        model_name="llama3-8b-8192"
+    )
+    return RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
+        return_source_documents=True
+    )

requirements.txt CHANGED Viewed

@@ -1,3 +1,10 @@
-altair
-pandas
-streamlit

+langchain
+langchain-community
+sentence-transformers
+faiss-cpu
+fastapi
+uvicorn
+streamlit
+gitpython
+groq
+python-dotenv

vectorstore.py ADDED Viewed

+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+def create_vectorstore(documents):
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    return FAISS.from_documents(documents, embeddings)