Spaces:
Running
Running
File size: 6,457 Bytes
388aa42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | """
Setup Utility Script
Helps in initial setup and vectorstore building
"""
import os
import sys
def check_dependencies():
"""Check if all required packages are installed"""
print("Checking dependencies...")
required_packages = [
'langchain',
'langgraph',
'langchain_groq',
'tavily',
'faiss',
'transformers',
'sentence_transformers',
'pypdf',
'pytesseract',
'PIL',
'torch',
'dotenv'
]
missing = []
for package in required_packages:
try:
if package == 'PIL':
__import__('PIL')
elif package == 'dotenv':
__import__('dotenv')
else:
__import__(package)
print(f" โ
{package}")
except ImportError:
print(f" โ {package} - NOT FOUND")
missing.append(package)
if missing:
print(f"\nโ ๏ธ Missing packages: {', '.join(missing)}")
print("Run: pip install -r requirements.txt")
return False
print("\nโ
All dependencies installed!")
return True
def check_env_file():
"""Check if .env file exists and has required keys"""
print("\nChecking environment configuration...")
if not os.path.exists('.env'):
print(" โ .env file not found")
print(" โ Copy .env.example to .env and add your API keys")
return False
from dotenv import load_dotenv
load_dotenv()
required_keys = ['GROQ_API_KEY', 'TAVILY_API_KEY']
missing = []
for key in required_keys:
value = os.getenv(key)
if not value or value.startswith('your_'):
print(f" โ {key} - NOT CONFIGURED")
missing.append(key)
else:
print(f" โ
{key}")
if missing:
print(f"\nโ ๏ธ Missing API keys: {', '.join(missing)}")
print("Edit .env file and add your API keys")
return False
print("\nโ
Environment configured!")
return True
def check_pdf_data():
"""Check if PDF data is available"""
print("\nChecking PDF data...")
schemes_folder = "data/schemes_pdfs"
exams_folder = "data/exams_pdfs"
scheme_pdfs = [f for f in os.listdir(schemes_folder) if f.endswith('.pdf')] if os.path.exists(schemes_folder) else []
exam_pdfs = [f for f in os.listdir(exams_folder) if f.endswith('.pdf')] if os.path.exists(exams_folder) else []
print(f" Scheme PDFs: {len(scheme_pdfs)} files")
print(f" Exam PDFs: {len(exam_pdfs)} files")
if len(scheme_pdfs) == 0:
print("\n โ ๏ธ No scheme PDFs found in data/schemes_pdfs/")
print(" Add government scheme PDFs to enable scheme recommendations")
if len(exam_pdfs) == 0:
print("\n โ ๏ธ No exam PDFs found in data/exams_pdfs/")
print(" Add competitive exam PDFs to enable exam recommendations")
return len(scheme_pdfs) > 0 or len(exam_pdfs) > 0
def build_vectorstores():
"""Build vectorstores from PDFs"""
print("\n" + "="*70)
print("Building Vectorstores")
print("="*70)
# Build scheme vectorstore
print("\n๐ Building Scheme Vectorstore...")
try:
from rag.scheme_vectorstore import build_scheme_vectorstore
build_scheme_vectorstore()
except Exception as e:
print(f"โ Error building scheme vectorstore: {str(e)}")
# Build exam vectorstore
print("\n๐ Building Exam Vectorstore...")
try:
from rag.exam_vectorstore import build_exam_vectorstore
build_exam_vectorstore()
except Exception as e:
print(f"โ Error building exam vectorstore: {str(e)}")
print("\n" + "="*70)
print("โ
Vectorstore building complete!")
print("="*70)
def setup_wizard():
"""Interactive setup wizard"""
print("\n" + "="*70)
print("๐ JANSAHAYAK SETUP WIZARD")
print("="*70)
# Step 1: Check dependencies
print("\n[1/4] Checking Dependencies")
print("-" * 70)
deps_ok = check_dependencies()
if not deps_ok:
print("\nโ Please install missing dependencies first")
print("Run: pip install -r requirements.txt")
return
# Step 2: Check environment
print("\n[2/4] Checking Environment Configuration")
print("-" * 70)
env_ok = check_env_file()
if not env_ok:
print("\nโ Please configure your .env file first")
return
# Step 3: Check PDF data
print("\n[3/4] Checking PDF Data")
print("-" * 70)
data_ok = check_pdf_data()
if not data_ok:
print("\nโ ๏ธ No PDF data found. System will work with limited functionality.")
cont = input("\nContinue anyway? (yes/no): ")
if cont.lower() not in ['yes', 'y']:
print("\nPlease add PDF files to data/ directories and run setup again.")
return
# Step 4: Build vectorstores
print("\n[4/4] Building Vectorstores")
print("-" * 70)
if data_ok:
build = input("\nBuild vectorstores now? (yes/no): ")
if build.lower() in ['yes', 'y']:
build_vectorstores()
else:
print("\nโ ๏ธ Remember to build vectorstores before running the system!")
print("Run: python setup.py --build-vectorstores")
print("\n" + "="*70)
print("โ
SETUP COMPLETE!")
print("="*70)
print("\nYou can now run the system:")
print(" python main.py")
print("\nFor help:")
print(" python main.py --help")
def main():
"""Main setup function"""
if len(sys.argv) > 1:
if sys.argv[1] == '--build-vectorstores':
build_vectorstores()
elif sys.argv[1] == '--check':
check_dependencies()
check_env_file()
check_pdf_data()
else:
print("Usage:")
print(" python setup.py # Run setup wizard")
print(" python setup.py --check # Check configuration")
print(" python setup.py --build-vectorstores # Build vectorstores")
else:
setup_wizard()
if __name__ == "__main__":
main()
|