Spaces:

themehmi
/

RepoRaptor

Running

App Files Files Community

themehmi commited on 3 days ago

Commit

bd40b81

verified ·

1 Parent(s): 613f1c3

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -13

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import shutil
 import subprocess
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
@@ -38,8 +40,8 @@ def load_llm():
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
-        torch_dtype="auto",
         low_cpu_mem_usage=True
     )
@@ -51,7 +53,7 @@ def load_llm():
     )
     return HuggingFacePipeline(
         pipeline=pipe,
-        pipeline_kwargs={"max_new_tokens": 300, "temperature": 0.1, "repetition_penalty": 1.1}
     )
 # 2. CODE INGESTION & VECTOR DATABASE
@@ -96,8 +98,8 @@ def setup_vector_db():
         try:
             splitter = RecursiveCharacterTextSplitter.from_language(
                 language=lang,
-                chunk_size=500,
-                chunk_overlap=50
             )
             all_splits.extend(splitter.split_documents(docs))
         except Exception:
@@ -107,15 +109,19 @@ def setup_vector_db():
     # Split generic documents
     if generic_docs:
         generic_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=500,
-            chunk_overlap=50
         )
         all_splits.extend(generic_splitter.split_documents(generic_docs))
     if not all_splits:
         return None, 0
-    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
     db = FAISS.from_documents(all_splits, embeddings)
     return db, file_count
@@ -126,15 +132,20 @@ device_status = "🟢 GPU Active" if torch.cuda.is_available() else "🟡 CPU Mo
 llm = load_llm()
 vector_db, file_count = setup_vector_db()
-prompt_template = """You are a specialized Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions.
 If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.
-Use the following codebase context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up code.
-Context: {context}
 Question: {input}
-Helpful Developer Answer:"""
 prompt = PromptTemplate.from_template(prompt_template)
@@ -144,7 +155,7 @@ def format_docs(docs):
 def build_qa_chain(db):
     if not db:
         return None
-    retriever = db.as_retriever(search_kwargs={"k": 3})
     return (
         {"context": retriever, "input": RunnablePassthrough()}
         | RunnablePassthrough.assign(

 import subprocess
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Maximize Hugging Face CPU Tier performance by limiting thread thrashing
+torch.set_num_threads(os.cpu_count() or 2)
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="cpu",
+        torch_dtype=torch.float32,
         low_cpu_mem_usage=True
     )
     )
     return HuggingFacePipeline(
         pipeline=pipe,
+        pipeline_kwargs={"max_new_tokens": 512, "temperature": 0.1, "repetition_penalty": 1.1}
     )
 # 2. CODE INGESTION & VECTOR DATABASE
         try:
             splitter = RecursiveCharacterTextSplitter.from_language(
                 language=lang,
+                chunk_size=1000,
+                chunk_overlap=200
             )
             all_splits.extend(splitter.split_documents(docs))
         except Exception:
     # Split generic documents
     if generic_docs:
         generic_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
         )
         all_splits.extend(generic_splitter.split_documents(generic_docs))
     if not all_splits:
         return None, 0
+    embeddings = HuggingFaceEmbeddings(
+        model_name="all-MiniLM-L6-v2",
+        model_kwargs={'device': 'cpu'},
+        encode_kwargs={'normalize_embeddings': False}
+    )
     db = FAISS.from_documents(all_splits, embeddings)
     return db, file_count
 llm = load_llm()
 vector_db, file_count = setup_vector_db()
+prompt_template = """You are an expert Software Engineer and Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions.
 If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.
+When answering:
+1. Carefully analyze the provided context.
+2. Provide a clear, step-by-step explanation.
+3. If providing code, use markdown code blocks.
+4. If the answer cannot be found in the context, explicitly state that you don't know rather than hallucinating.
+Codebase Context:
+{context}
 Question: {input}
+Expert Developer Answer:"""
 prompt = PromptTemplate.from_template(prompt_template)
 def build_qa_chain(db):
     if not db:
         return None
+    retriever = db.as_retriever(search_kwargs={"k": 5})
     return (
         {"context": retriever, "input": RunnablePassthrough()}
         | RunnablePassthrough.assign(