IW2025 commited on
Commit
bbcaec4
·
verified ·
1 Parent(s): e6f29f5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -17
app.py CHANGED
@@ -38,10 +38,12 @@ class CurriculumAssistant:
38
  "text-generation",
39
  model=model,
40
  tokenizer=tokenizer,
41
- max_new_tokens=256,
42
  temperature=0.7,
43
  top_p=0.95,
44
- repetition_penalty=1.15
 
 
45
  )
46
 
47
  self.llm = HuggingFacePipeline(pipeline=pipe)
@@ -101,10 +103,10 @@ class CurriculumAssistant:
101
  print("No text could be extracted from PDF files!")
102
  return False
103
 
104
- # Split text into chunks with metadata
105
  text_splitter = RecursiveCharacterTextSplitter(
106
- chunk_size=1000,
107
- chunk_overlap=200,
108
  length_function=len,
109
  )
110
 
@@ -149,21 +151,17 @@ class CurriculumAssistant:
149
  if not self.vector_db or not self.llm:
150
  return False
151
 
152
- # Custom prompt template for Q&A
153
- qa_template = """You are an expert programming instructor for the Inclusive World Curriculum.
154
- Use the following context to answer the student's question. If the information is not in the context,
155
- provide a helpful response based on your knowledge of programming concepts.
156
 
157
- Context: {context}
158
 
159
- Question: {question}
160
-
161
- Answer:"""
162
 
163
  self.qa_chain = RetrievalQA.from_chain_type(
164
  llm=self.llm,
165
  chain_type="stuff",
166
- retriever=self.vector_db.as_retriever(search_kwargs={"k": 5}),
167
  chain_type_kwargs={
168
  "prompt": PromptTemplate(
169
  template=qa_template,
@@ -178,7 +176,7 @@ class CurriculumAssistant:
178
  """Find relevant pages for a given question"""
179
  try:
180
  # Search for relevant chunks
181
- results = self.vector_db.similarity_search(question, k=5)
182
 
183
  relevant_pages = []
184
  seen_pages = set()
@@ -215,7 +213,7 @@ class CurriculumAssistant:
215
 
216
  # Sort by relevance and return top results
217
  relevant_pages.sort(key=lambda x: x['relevance_score'], reverse=True)
218
- return relevant_pages[:3] # Return top 3 most relevant pages
219
 
220
  except Exception as e:
221
  print(f"Error finding relevant pages: {str(e)}")
@@ -257,7 +255,7 @@ def ask_question(question: str, assistant: CurriculumAssistant):
257
  page_info = "📄 **Relevant Pages Found:**\n\n"
258
  for i, page in enumerate(relevant_pages, 1):
259
  page_info += f"**{i}. {page['filename']} - Page {page['page_number']}**\n"
260
- page_info += f"```\n{page['content'][:300]}...\n```\n\n"
261
  else:
262
  page_info = "No specific pages found for this question."
263
 
 
38
  "text-generation",
39
  model=model,
40
  tokenizer=tokenizer,
41
+ max_new_tokens=128, # Reduced from 256
42
  temperature=0.7,
43
  top_p=0.95,
44
+ repetition_penalty=1.15,
45
+ do_sample=True,
46
+ pad_token_id=tokenizer.eos_token_id
47
  )
48
 
49
  self.llm = HuggingFacePipeline(pipeline=pipe)
 
103
  print("No text could be extracted from PDF files!")
104
  return False
105
 
106
+ # Split text into smaller chunks with metadata
107
  text_splitter = RecursiveCharacterTextSplitter(
108
+ chunk_size=500, # Reduced from 1000
109
+ chunk_overlap=50, # Reduced from 200
110
  length_function=len,
111
  )
112
 
 
151
  if not self.vector_db or not self.llm:
152
  return False
153
 
154
+ # Shorter prompt template for DialoGPT
155
+ qa_template = """Context: {context}
 
 
156
 
157
+ Question: {question}
158
 
159
+ Answer:"""
 
 
160
 
161
  self.qa_chain = RetrievalQA.from_chain_type(
162
  llm=self.llm,
163
  chain_type="stuff",
164
+ retriever=self.vector_db.as_retriever(search_kwargs={"k": 2}), # Reduced from 5
165
  chain_type_kwargs={
166
  "prompt": PromptTemplate(
167
  template=qa_template,
 
176
  """Find relevant pages for a given question"""
177
  try:
178
  # Search for relevant chunks
179
+ results = self.vector_db.similarity_search(question, k=3) # Reduced from 5
180
 
181
  relevant_pages = []
182
  seen_pages = set()
 
213
 
214
  # Sort by relevance and return top results
215
  relevant_pages.sort(key=lambda x: x['relevance_score'], reverse=True)
216
+ return relevant_pages[:2] # Reduced from 3
217
 
218
  except Exception as e:
219
  print(f"Error finding relevant pages: {str(e)}")
 
255
  page_info = "📄 **Relevant Pages Found:**\n\n"
256
  for i, page in enumerate(relevant_pages, 1):
257
  page_info += f"**{i}. {page['filename']} - Page {page['page_number']}**\n"
258
+ page_info += f"```\n{page['content'][:200]}...\n```\n\n" # Reduced from 300
259
  else:
260
  page_info = "No specific pages found for this question."
261