IW2025 commited on
Commit
4b63357
·
verified ·
1 Parent(s): e5b03ae

Upload 7 files

Browse files
Files changed (4) hide show
  1. config.py +55 -0
  2. run.py +92 -0
  3. setup.py +170 -0
  4. utils.py +83 -0
config.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration file for the Inclusive World Curriculum Assistant
3
+ """
4
+
5
+ # Model Configuration
6
+ MODEL_CONFIG = {
7
+ "model_name": "microsoft/DialoGPT-medium",
8
+ "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
9
+ "max_new_tokens": 256,
10
+ "temperature": 0.7,
11
+ "top_p": 0.95,
12
+ "repetition_penalty": 1.15,
13
+ "torch_dtype": "float16",
14
+ "device_map": "auto",
15
+ "trust_remote_code": True
16
+ }
17
+
18
+ # Vector Database Configuration
19
+ VECTOR_DB_CONFIG = {
20
+ "chunk_size": 1000,
21
+ "chunk_overlap": 200,
22
+ "persist_directory": "./chroma_db",
23
+ "search_kwargs": {"k": 3}
24
+ }
25
+
26
+ # File Processing Configuration
27
+ FILE_CONFIG = {
28
+ "slides_directory": "Slides",
29
+ "supported_formats": [".pdf"],
30
+ "max_preview_length": 500
31
+ }
32
+
33
+ # UI Configuration
34
+ UI_CONFIG = {
35
+ "page_title": "Inclusive World Curriculum Assistant",
36
+ "page_icon": "🎓",
37
+ "layout": "wide",
38
+ "initial_sidebar_state": "expanded"
39
+ }
40
+
41
+ # Curriculum Topics (for quick access)
42
+ CURRICULUM_TOPICS = [
43
+ "Variables and Data Types",
44
+ "Control Structures (if/else)",
45
+ "Loops (for, while)",
46
+ "Functions and Methods",
47
+ "Arrays and Lists",
48
+ "Object-Oriented Programming",
49
+ "Error Handling",
50
+ "File Operations",
51
+ "Web Development Basics",
52
+ "Database Fundamentals",
53
+ "API Development",
54
+ "Testing Strategies"
55
+ ]
run.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Launcher script for Inclusive World Curriculum Assistant
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from pathlib import Path
10
+
11
+ def check_environment():
12
+ """Check if the environment is properly set up"""
13
+ print("🔍 Checking environment...")
14
+
15
+ # Check if required files exist
16
+ required_files = ["app.py", "config.py", "utils.py", "requirements.txt"]
17
+ missing_files = []
18
+
19
+ for file in required_files:
20
+ if not Path(file).exists():
21
+ missing_files.append(file)
22
+
23
+ if missing_files:
24
+ print(f"❌ Missing required files: {', '.join(missing_files)}")
25
+ return False
26
+
27
+ # Check if Slides directory exists
28
+ if not Path("Slides").exists():
29
+ print("⚠️ Slides directory not found. Creating...")
30
+ Path("Slides").mkdir(exist_ok=True)
31
+
32
+ print("✅ Environment check passed")
33
+ return True
34
+
35
+ def check_dependencies():
36
+ """Check if dependencies are installed"""
37
+ print("📦 Checking dependencies...")
38
+
39
+ try:
40
+ import streamlit
41
+ import langchain
42
+ import chromadb
43
+ import transformers
44
+ import torch
45
+ import fitz
46
+ print("✅ All dependencies are installed")
47
+ return True
48
+ except ImportError as e:
49
+ print(f"❌ Missing dependency: {e}")
50
+ print("Please run: pip install -r requirements.txt")
51
+ return False
52
+
53
+ def start_application():
54
+ """Start the Streamlit application"""
55
+ print("🚀 Starting Inclusive World Curriculum Assistant...")
56
+ print("📖 Opening web interface...")
57
+ print("🌐 The application will open in your default browser")
58
+ print("⏳ Please wait for the system to load...")
59
+ print("\n" + "="*50)
60
+
61
+ try:
62
+ # Start Streamlit
63
+ subprocess.run([
64
+ sys.executable, "-m", "streamlit", "run", "app.py",
65
+ "--server.port", "8501",
66
+ "--server.address", "localhost"
67
+ ])
68
+ except KeyboardInterrupt:
69
+ print("\n👋 Application stopped by user")
70
+ except Exception as e:
71
+ print(f"❌ Error starting application: {e}")
72
+
73
+ def main():
74
+ """Main launcher function"""
75
+ print("🎓 Inclusive World Curriculum Assistant")
76
+ print("=" * 50)
77
+
78
+ # Check environment
79
+ if not check_environment():
80
+ print("\n❌ Environment check failed. Please ensure all files are present.")
81
+ sys.exit(1)
82
+
83
+ # Check dependencies
84
+ if not check_dependencies():
85
+ print("\n❌ Dependencies check failed. Please install required packages.")
86
+ sys.exit(1)
87
+
88
+ # Start application
89
+ start_application()
90
+
91
+ if __name__ == "__main__":
92
+ main()
setup.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Setup script for Inclusive World Curriculum Assistant
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ import platform
10
+ from pathlib import Path
11
+
12
+ def check_python_version():
13
+ """Check if Python version is compatible"""
14
+ if sys.version_info < (3, 8):
15
+ print("❌ Error: Python 3.8 or higher is required")
16
+ print(f"Current version: {sys.version}")
17
+ return False
18
+ print(f"✅ Python version: {sys.version}")
19
+ return True
20
+
21
+ def check_system_requirements():
22
+ """Check system requirements"""
23
+ print("\n🔍 Checking system requirements...")
24
+
25
+ # Check available memory (rough estimate)
26
+ try:
27
+ import psutil
28
+ memory_gb = psutil.virtual_memory().total / (1024**3)
29
+ print(f"📊 Available RAM: {memory_gb:.1f} GB")
30
+ if memory_gb < 8:
31
+ print("⚠️ Warning: Less than 8GB RAM detected. Model loading may be slow.")
32
+ else:
33
+ print("✅ Sufficient RAM detected")
34
+ except ImportError:
35
+ print("⚠️ psutil not available - cannot check RAM")
36
+
37
+ # Check disk space
38
+ try:
39
+ disk_usage = psutil.disk_usage('.')
40
+ free_gb = disk_usage.free / (1024**3)
41
+ print(f"💾 Available disk space: {free_gb:.1f} GB")
42
+ if free_gb < 5:
43
+ print("⚠️ Warning: Less than 5GB free space. Consider freeing up space.")
44
+ else:
45
+ print("✅ Sufficient disk space")
46
+ except:
47
+ print("⚠️ Could not check disk space")
48
+
49
+ def install_dependencies():
50
+ """Install required dependencies"""
51
+ print("\n📦 Installing dependencies...")
52
+
53
+ try:
54
+ # Upgrade pip first
55
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
56
+
57
+ # Install requirements
58
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
59
+ print("✅ Dependencies installed successfully")
60
+ return True
61
+ except subprocess.CalledProcessError as e:
62
+ print(f"❌ Error installing dependencies: {e}")
63
+ return False
64
+
65
+ def create_directories():
66
+ """Create necessary directories"""
67
+ print("\n📁 Creating directories...")
68
+
69
+ directories = ["Slides", "chroma_db"]
70
+ for directory in directories:
71
+ Path(directory).mkdir(exist_ok=True)
72
+ print(f"✅ Created directory: {directory}")
73
+
74
+ def check_curriculum_files():
75
+ """Check if curriculum files exist"""
76
+ print("\n📚 Checking curriculum files...")
77
+
78
+ slides_dir = Path("Slides")
79
+ if not slides_dir.exists():
80
+ print("⚠️ Slides directory not found. Creating...")
81
+ slides_dir.mkdir(exist_ok=True)
82
+
83
+ pdf_files = list(slides_dir.glob("*.pdf"))
84
+ if pdf_files:
85
+ print(f"✅ Found {len(pdf_files)} curriculum PDF files:")
86
+ for pdf in pdf_files:
87
+ print(f" 📄 {pdf.name}")
88
+ else:
89
+ print("⚠️ No PDF files found in Slides directory")
90
+ print(" Please add your curriculum PDF files to the Slides/ directory")
91
+
92
+ def create_sample_config():
93
+ """Create a sample configuration if needed"""
94
+ print("\n⚙️ Checking configuration...")
95
+
96
+ if not Path("config.py").exists():
97
+ print("❌ config.py not found. Please ensure it exists.")
98
+ return False
99
+
100
+ print("✅ Configuration file found")
101
+ return True
102
+
103
+ def test_imports():
104
+ """Test if key modules can be imported"""
105
+ print("\n🧪 Testing imports...")
106
+
107
+ required_modules = [
108
+ "streamlit",
109
+ "langchain",
110
+ "chromadb",
111
+ "transformers",
112
+ "torch",
113
+ "fitz"
114
+ ]
115
+
116
+ failed_imports = []
117
+ for module in required_modules:
118
+ try:
119
+ __import__(module)
120
+ print(f"✅ {module}")
121
+ except ImportError:
122
+ print(f"❌ {module}")
123
+ failed_imports.append(module)
124
+
125
+ if failed_imports:
126
+ print(f"\n❌ Failed to import: {', '.join(failed_imports)}")
127
+ return False
128
+
129
+ print("✅ All required modules imported successfully")
130
+ return True
131
+
132
+ def main():
133
+ """Main setup function"""
134
+ print("🎓 Inclusive World Curriculum Assistant Setup")
135
+ print("=" * 50)
136
+
137
+ # Check Python version
138
+ if not check_python_version():
139
+ sys.exit(1)
140
+
141
+ # Check system requirements
142
+ check_system_requirements()
143
+
144
+ # Create directories
145
+ create_directories()
146
+
147
+ # Check curriculum files
148
+ check_curriculum_files()
149
+
150
+ # Check configuration
151
+ if not create_sample_config():
152
+ sys.exit(1)
153
+
154
+ # Install dependencies
155
+ if not install_dependencies():
156
+ print("\n❌ Setup failed. Please check the error messages above.")
157
+ sys.exit(1)
158
+
159
+ # Test imports
160
+ if not test_imports():
161
+ print("\n❌ Some modules failed to import. Please reinstall dependencies.")
162
+ sys.exit(1)
163
+
164
+ print("\n🎉 Setup completed successfully!")
165
+ print("\n🚀 To start the application:")
166
+ print(" streamlit run app.py")
167
+ print("\n📖 For more information, see README.md")
168
+
169
+ if __name__ == "__main__":
170
+ main()
utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the Inclusive World Curriculum Assistant
3
+ """
4
+
5
+ import re
6
+ from typing import List, Dict, Any
7
+ from pathlib import Path
8
+ import fitz
9
+ from config import CURRICULUM_TOPICS
10
+
11
+ def clean_text(text: str) -> str:
12
+ """Clean and normalize text content"""
13
+ # Remove extra whitespace
14
+ text = re.sub(r'\s+', ' ', text)
15
+ # Remove special characters that might interfere with processing
16
+ text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\(\)\[\]\{\}]', '', text)
17
+ return text.strip()
18
+
19
+ def extract_curriculum_topics(text: str) -> List[str]:
20
+ """Extract relevant curriculum topics from text"""
21
+ found_topics = []
22
+ text_lower = text.lower()
23
+
24
+ for topic in CURRICULUM_TOPICS:
25
+ topic_lower = topic.lower()
26
+ if any(word in text_lower for word in topic_lower.split()):
27
+ found_topics.append(topic)
28
+
29
+ return found_topics
30
+
31
+ def create_curriculum_summary(docs: List[Dict[str, Any]]) -> Dict[str, Any]:
32
+ """Create a summary of processed curriculum documents"""
33
+ summary = {
34
+ "total_documents": len(docs),
35
+ "total_content_length": sum(len(doc.get('content', '')) for doc in docs),
36
+ "topics_covered": [],
37
+ "document_types": {}
38
+ }
39
+
40
+ # Analyze document types
41
+ for doc in docs:
42
+ filename = doc.get('filename', '')
43
+ if 'week' in filename.lower():
44
+ week_num = re.search(r'week\s*(\d+)', filename.lower())
45
+ if week_num:
46
+ summary["document_types"][f"Week {week_num.group(1)}"] = filename
47
+
48
+ # Extract common topics
49
+ all_content = ' '.join([doc.get('content', '') for doc in docs])
50
+ summary["topics_covered"] = extract_curriculum_topics(all_content)
51
+
52
+ return summary
53
+
54
+ def validate_pdf_file(file_path: str) -> bool:
55
+ """Validate if a file is a readable PDF"""
56
+ try:
57
+ doc = fitz.open(file_path)
58
+ if doc.page_count > 0:
59
+ doc.close()
60
+ return True
61
+ doc.close()
62
+ return False
63
+ except Exception:
64
+ return False
65
+
66
+ def get_file_info(file_path: str) -> Dict[str, Any]:
67
+ """Get information about a PDF file"""
68
+ try:
69
+ doc = fitz.open(file_path)
70
+ info = {
71
+ "filename": Path(file_path).name,
72
+ "page_count": doc.page_count,
73
+ "file_size": Path(file_path).stat().st_size,
74
+ "is_valid": True
75
+ }
76
+ doc.close()
77
+ return info
78
+ except Exception as e:
79
+ return {
80
+ "filename": Path(file_path).name,
81
+ "error": str(e),
82
+ "is_valid": False
83
+ }