themehmi commited on
Commit
bd40b81
·
verified ·
1 Parent(s): 613f1c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -13
app.py CHANGED
@@ -5,6 +5,8 @@ import shutil
5
  import subprocess
6
 
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 
8
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
  from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
@@ -38,8 +40,8 @@ def load_llm():
38
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
41
- device_map="auto",
42
- torch_dtype="auto",
43
  low_cpu_mem_usage=True
44
  )
45
 
@@ -51,7 +53,7 @@ def load_llm():
51
  )
52
  return HuggingFacePipeline(
53
  pipeline=pipe,
54
- pipeline_kwargs={"max_new_tokens": 300, "temperature": 0.1, "repetition_penalty": 1.1}
55
  )
56
 
57
  # 2. CODE INGESTION & VECTOR DATABASE
@@ -96,8 +98,8 @@ def setup_vector_db():
96
  try:
97
  splitter = RecursiveCharacterTextSplitter.from_language(
98
  language=lang,
99
- chunk_size=500,
100
- chunk_overlap=50
101
  )
102
  all_splits.extend(splitter.split_documents(docs))
103
  except Exception:
@@ -107,15 +109,19 @@ def setup_vector_db():
107
  # Split generic documents
108
  if generic_docs:
109
  generic_splitter = RecursiveCharacterTextSplitter(
110
- chunk_size=500,
111
- chunk_overlap=50
112
  )
113
  all_splits.extend(generic_splitter.split_documents(generic_docs))
114
 
115
  if not all_splits:
116
  return None, 0
117
 
118
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
 
 
 
119
  db = FAISS.from_documents(all_splits, embeddings)
120
 
121
  return db, file_count
@@ -126,15 +132,20 @@ device_status = "🟢 GPU Active" if torch.cuda.is_available() else "🟡 CPU Mo
126
  llm = load_llm()
127
  vector_db, file_count = setup_vector_db()
128
 
129
- prompt_template = """You are a specialized Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions.
130
  If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.
131
 
132
- Use the following codebase context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up code.
 
 
 
 
133
 
134
- Context: {context}
 
135
 
136
  Question: {input}
137
- Helpful Developer Answer:"""
138
 
139
  prompt = PromptTemplate.from_template(prompt_template)
140
 
@@ -144,7 +155,7 @@ def format_docs(docs):
144
  def build_qa_chain(db):
145
  if not db:
146
  return None
147
- retriever = db.as_retriever(search_kwargs={"k": 3})
148
  return (
149
  {"context": retriever, "input": RunnablePassthrough()}
150
  | RunnablePassthrough.assign(
 
5
  import subprocess
6
 
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
+ # Maximize Hugging Face CPU Tier performance by limiting thread thrashing
9
+ torch.set_num_threads(os.cpu_count() or 2)
10
 
11
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
  from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
 
40
 
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_id,
43
+ device_map="cpu",
44
+ torch_dtype=torch.float32,
45
  low_cpu_mem_usage=True
46
  )
47
 
 
53
  )
54
  return HuggingFacePipeline(
55
  pipeline=pipe,
56
+ pipeline_kwargs={"max_new_tokens": 512, "temperature": 0.1, "repetition_penalty": 1.1}
57
  )
58
 
59
  # 2. CODE INGESTION & VECTOR DATABASE
 
98
  try:
99
  splitter = RecursiveCharacterTextSplitter.from_language(
100
  language=lang,
101
+ chunk_size=1000,
102
+ chunk_overlap=200
103
  )
104
  all_splits.extend(splitter.split_documents(docs))
105
  except Exception:
 
109
  # Split generic documents
110
  if generic_docs:
111
  generic_splitter = RecursiveCharacterTextSplitter(
112
+ chunk_size=1000,
113
+ chunk_overlap=200
114
  )
115
  all_splits.extend(generic_splitter.split_documents(generic_docs))
116
 
117
  if not all_splits:
118
  return None, 0
119
 
120
+ embeddings = HuggingFaceEmbeddings(
121
+ model_name="all-MiniLM-L6-v2",
122
+ model_kwargs={'device': 'cpu'},
123
+ encode_kwargs={'normalize_embeddings': False}
124
+ )
125
  db = FAISS.from_documents(all_splits, embeddings)
126
 
127
  return db, file_count
 
132
  llm = load_llm()
133
  vector_db, file_count = setup_vector_db()
134
 
135
+ prompt_template = """You are an expert Software Engineer and Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions.
136
  If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.
137
 
138
+ When answering:
139
+ 1. Carefully analyze the provided context.
140
+ 2. Provide a clear, step-by-step explanation.
141
+ 3. If providing code, use markdown code blocks.
142
+ 4. If the answer cannot be found in the context, explicitly state that you don't know rather than hallucinating.
143
 
144
+ Codebase Context:
145
+ {context}
146
 
147
  Question: {input}
148
+ Expert Developer Answer:"""
149
 
150
  prompt = PromptTemplate.from_template(prompt_template)
151
 
 
155
  def build_qa_chain(db):
156
  if not db:
157
  return None
158
+ retriever = db.as_retriever(search_kwargs={"k": 5})
159
  return (
160
  {"context": retriever, "input": RunnablePassthrough()}
161
  | RunnablePassthrough.assign(