Spaces:

IW2025
/

InclusiveWorldChatbot

Sleeping

App Files Files Community

IW2025 commited on Jul 16, 2025

Commit

bbcaec4

verified ·

1 Parent(s): e6f29f5

Upload app.py

Browse files

Files changed (1) hide show

app.py +15 -17

app.py CHANGED Viewed

@@ -38,10 +38,12 @@ class CurriculumAssistant:
                 "text-generation",
                 model=model,
                 tokenizer=tokenizer,
-                max_new_tokens=256,
                 temperature=0.7,
                 top_p=0.95,
-                repetition_penalty=1.15
             )
             self.llm = HuggingFacePipeline(pipeline=pipe)
@@ -101,10 +103,10 @@ class CurriculumAssistant:
                 print("No text could be extracted from PDF files!")
                 return False
-            # Split text into chunks with metadata
             text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=200,
                 length_function=len,
             )
@@ -149,21 +151,17 @@ class CurriculumAssistant:
         if not self.vector_db or not self.llm:
             return False
-        # Custom prompt template for Q&A
-        qa_template = """You are an expert programming instructor for the Inclusive World Curriculum.
-        Use the following context to answer the student's question. If the information is not in the context,
-        provide a helpful response based on your knowledge of programming concepts.
-        Context: {context}
-        Question: {question}
-        Answer:"""
         self.qa_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
             chain_type="stuff",
-            retriever=self.vector_db.as_retriever(search_kwargs={"k": 5}),
             chain_type_kwargs={
                 "prompt": PromptTemplate(
                     template=qa_template,
@@ -178,7 +176,7 @@ class CurriculumAssistant:
         """Find relevant pages for a given question"""
         try:
             # Search for relevant chunks
-            results = self.vector_db.similarity_search(question, k=5)
             relevant_pages = []
             seen_pages = set()
@@ -215,7 +213,7 @@ class CurriculumAssistant:
             # Sort by relevance and return top results
             relevant_pages.sort(key=lambda x: x['relevance_score'], reverse=True)
-            return relevant_pages[:3]  # Return top 3 most relevant pages
         except Exception as e:
             print(f"Error finding relevant pages: {str(e)}")
@@ -257,7 +255,7 @@ def ask_question(question: str, assistant: CurriculumAssistant):
             page_info = "📄 **Relevant Pages Found:**\n\n"
             for i, page in enumerate(relevant_pages, 1):
                 page_info += f"**{i}. {page['filename']} - Page {page['page_number']}**\n"
-                page_info += f"```\n{page['content'][:300]}...\n```\n\n"
         else:
             page_info = "No specific pages found for this question."

                 "text-generation",
                 model=model,
                 tokenizer=tokenizer,
+                max_new_tokens=128,  # Reduced from 256
                 temperature=0.7,
                 top_p=0.95,
+                repetition_penalty=1.15,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id
             )
             self.llm = HuggingFacePipeline(pipeline=pipe)
                 print("No text could be extracted from PDF files!")
                 return False
+            # Split text into smaller chunks with metadata
             text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=500,  # Reduced from 1000
+                chunk_overlap=50,  # Reduced from 200
                 length_function=len,
             )
         if not self.vector_db or not self.llm:
             return False
+        # Shorter prompt template for DialoGPT
+        qa_template = """Context: {context}
+Question: {question}
+Answer:"""
         self.qa_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
             chain_type="stuff",
+            retriever=self.vector_db.as_retriever(search_kwargs={"k": 2}),  # Reduced from 5
             chain_type_kwargs={
                 "prompt": PromptTemplate(
                     template=qa_template,
         """Find relevant pages for a given question"""
         try:
             # Search for relevant chunks
+            results = self.vector_db.similarity_search(question, k=3)  # Reduced from 5
             relevant_pages = []
             seen_pages = set()
             # Sort by relevance and return top results
             relevant_pages.sort(key=lambda x: x['relevance_score'], reverse=True)
+            return relevant_pages[:2]  # Reduced from 3
         except Exception as e:
             print(f"Error finding relevant pages: {str(e)}")
             page_info = "📄 **Relevant Pages Found:**\n\n"
             for i, page in enumerate(relevant_pages, 1):
                 page_info += f"**{i}. {page['filename']} - Page {page['page_number']}**\n"
+                page_info += f"```\n{page['content'][:200]}...\n```\n\n"  # Reduced from 300
         else:
             page_info = "No specific pages found for this question."