InclusiveWorldChatbotSpace / simple_test.py
IW2025's picture
Upload 30 files
93fe96e verified
#!/usr/bin/env python3
"""
Simple Performance Test for Curriculum Assistant
Tests core functionality without heavy dependencies
"""
import time
import os
from pathlib import Path
import fitz # PyMuPDF
from PIL import Image
import io
class SimpleCurriculumTest:
def __init__(self, slides_dir="Slides"):
self.pdf_pages = {}
self.pdf_files = {}
self.response_cache = {}
self._process_pdfs(slides_dir)
print(f"βœ… Loaded {len(self.pdf_files)} PDF files")
def _process_pdfs(self, slides_dir):
"""Process PDFs and extract text"""
slides_path = Path(slides_dir)
pdf_files = list(slides_path.glob("*.pdf"))
for pdf_file in pdf_files:
self.pdf_files[pdf_file.name] = str(pdf_file)
doc = fitz.open(str(pdf_file))
pages = {}
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
if text.strip():
pages[page_num + 1] = text.strip()
self.pdf_pages[pdf_file.name] = pages
doc.close()
def simple_search(self, query):
"""Simple text-based search"""
start_time = time.time()
# Check cache
if query in self.response_cache:
print(f"βœ… Cache hit! Response time: {time.time() - start_time:.3f}s")
return self.response_cache[query]
# Simple keyword search
results = []
query_lower = query.lower()
for filename, pages in self.pdf_pages.items():
for page_num, text in pages.items():
if query_lower in text.lower():
results.append({
'filename': filename,
'page': page_num,
'content': text[:200] + "..." if len(text) > 200 else text
})
# Sort by relevance (simple keyword count)
for result in results:
result['score'] = result['content'].lower().count(query_lower)
results.sort(key=lambda x: x['score'], reverse=True)
# Generate response
if results:
best_result = results[0]
response = f"πŸ“„ Found in: {best_result['filename']} - Page {best_result['page']}\n\n"
response += f"Content: {best_result['content']}\n\n"
response += f"Found {len(results)} relevant pages"
else:
response = f"No relevant content found for '{query}'"
# Cache result
self.response_cache[query] = response
response_time = time.time() - start_time
print(f"βœ… Response generated in {response_time:.3f} seconds")
return response
def test_performance():
"""Run performance tests"""
print("πŸš€ Starting Simple Performance Test...")
# Initialize
start_time = time.time()
chatbot = SimpleCurriculumTest()
init_time = time.time() - start_time
print(f"βœ… Initialization time: {init_time:.3f} seconds")
# Test queries
test_queries = [
"loops",
"variables",
"functions",
"programming",
"for loop",
"while loop"
]
print(f"\nπŸ§ͺ Testing {len(test_queries)} queries...")
total_time = 0
for i, query in enumerate(test_queries, 1):
print(f"\n--- Test {i}/{len(test_queries)}: '{query}' ---")
start_time = time.time()
response = chatbot.simple_search(query)
query_time = time.time() - start_time
total_time += query_time
print(f"Response time: {query_time:.3f}s")
print(f"Response length: {len(response)} characters")
print(f"Cache size: {len(chatbot.response_cache)} entries")
# Show first 200 chars of response
print(f"Response preview: {response[:200]}...")
# Summary
avg_time = total_time / len(test_queries)
print(f"\nπŸ“Š Performance Summary:")
print(f"Total time: {total_time:.3f}s")
print(f"Average response time: {avg_time:.3f}s")
print(f"Initialization time: {init_time:.3f}s")
print(f"Cache hits: {len([q for q in test_queries if q in chatbot.response_cache])}")
# Performance rating
if avg_time < 0.1:
rating = "πŸš€ EXCELLENT (< 100ms)"
elif avg_time < 0.5:
rating = "βœ… GOOD (< 500ms)"
elif avg_time < 1.0:
rating = "⚠️ ACCEPTABLE (< 1s)"
else:
rating = "❌ SLOW (> 1s)"
print(f"Performance rating: {rating}")
if __name__ == "__main__":
test_performance()