Harsh-1132 commited on
Commit
c4f7836
·
1 Parent(s): 7717526
Files changed (3) hide show
  1. Data/shl_catalog.csv +0 -153
  2. api/main.py +22 -19
  3. setup.py +27 -33
Data/shl_catalog.csv DELETED
@@ -1,153 +0,0 @@
1
- assessment_name,assessment_url,category,test_type,description
2
- Latest browser options,https://browsehappy.com/,General,K,Latest browser options
3
- Careers,https://www.shl.com/careers/,General,P,Careers
4
- Our Culture,https://www.shl.com/careers/our-culture/,General,P,Our Culture
5
- Our Teams,https://www.shl.com/careers/our-teams/,General,P,Our Teams
6
- Our People,https://www.shl.com/careers/our-people/,General,P,Our People
7
- Join SHL,https://www.shl.com/careers/join-shl/,General,P,Join SHL
8
- Latest Jobs,https://www.shl.com/careers/jobs/,General,K,Latest Jobs
9
- Contact,https://www.shl.com/about/company/contact/,General,P,Contact
10
- Practice Tests,https://www.shl.com/shldirect/en/practice-tests/,General,K,Practice Tests
11
- Support,https://support.shl.com/,General,P,Support
12
- Candidate Support,https://support.shl.com/categories.html?hl=en&c=10_91_12_,General,P,Candidate Support
13
- Client Support,https://support.shl.com/categories.html?hl=en&c=10_91_13_,General,P,Client Support
14
- Contact Us,https://support.shl.com/KB_ContactUs?cg=candidate&l=en_US&p=&pt=&lg=&cg=,General,P,Contact Us
15
- Practice Site & Advice,https://www.shl.com/shldirect/en/practice-tests/,General,P,Practice Site & Advice
16
- Browser Check,https://support.shl.com/apex/BrowserCheck,General,P,Browser Check
17
- Login,https://www.shl.com/login/,General,P,Login
18
- Buy Online,https://www.shl.com/shl-online/,General,P,Buy Online
19
- English (Global),https://www.shl.com/,General,P,English (Global)
20
- English (India),https://www.shl.com/en-in/,General,P,English (India)
21
- English (Middle East & North Africa),https://www.shl.com/en-mena/,General,P,English (Middle East & North Africa)
22
- English (South Africa),https://www.shl.com/en-za/,General,P,English (South Africa)
23
- 简体中文 (Chinese),https://www.shlglobal.cn/,General,P,简体中文 (Chinese)
24
- 日本語 (Japanese),https://www.shl.co.jp/,General,P,日本語 (Japanese)
25
- Global Offices,https://www.shl.com/about/company/global-offices/,General,P,Global Offices
26
- Talent Acquisition,https://www.shl.com/solutions/talent-acquisition/,General,P,Talent Acquisition
27
- Graduate & Early Careers,https://www.shl.com/solutions/talent-acquisition/graduate/,General,P,Graduate & Early Careers
28
- Manager Hiring,https://www.shl.com/solutions/talent-acquisition/manager/,General,P,Manager Hiring
29
- Interviewing,https://www.shl.com/solutions/talent-acquisition/interviewing/,General,P,Interviewing
30
- Technology Hiring,https://www.shl.com/solutions/talent-acquisition/tech-hiring/,General,P,Technology Hiring
31
- Professional Hiring,https://www.shl.com/solutions/talent-acquisition/professional/,General,P,Professional Hiring
32
- Volume Hiring,https://www.shl.com/solutions/talent-acquisition/volume-hiring/,General,P,Volume Hiring
33
- BPO Hiring,https://www.shl.com/solutions/talent-acquisition/volume-hiring/bpo-hiring/,General,P,BPO Hiring
34
- Contact Center Hiring,https://www.shl.com/solutions/talent-acquisition/volume-hiring/contact-center-hiring/,General,P,Contact Center Hiring
35
- Retail Hiring,https://www.shl.com/solutions/talent-acquisition/volume-hiring/retail-hiring/,General,P,Retail Hiring
36
- Talent Management,https://www.shl.com/solutions/talent-management/,Leadership,P,Talent Management
37
- Succession Planning,https://www.shl.com/solutions/talent-management/succession-planning/,General,P,Succession Planning
38
- Enterprise Leader Development,https://www.shl.com/solutions/talent-management/enterprise-leader-development/,General,P,Enterprise Leader Development
39
- High Potential Identification,https://www.shl.com/solutions/talent-management/hipo/,General,P,High Potential Identification
40
- Manager Development,https://www.shl.com/solutions/talent-management/manager-development/,General,P,Manager Development
41
- Skills Development,https://www.shl.com/solutions/talent-management/skills-development/,General,K,Skills Development
42
- Sales Transformation,https://www.shl.com/solutions/talent-management/sales-transformation/,General,P,Sales Transformation
43
- Talent Mobility,https://www.shl.com/solutions/talent-management/talent-mobility/,General,P,Talent Mobility
44
- Talent Acquisition Demos,https://www.shl.com/resources/by-type/demos/#talent-acquisition-demos,General,P,Talent Acquisition Demos
45
- Talent Management Demos,https://www.shl.com/resources/by-type/demos/#talent-management-demos,Leadership,P,Talent Management Demos
46
- Launch Calculator,https://www.shl.com/resources/by-type/guides-and-ebooks/smart-interview-professional-value-calculator/,General,P,Launch Calculator
47
- Products,https://www.shl.com/products/,General,P,Products
48
- Occupational Personality Questionnaire (OPQ),https://www.shl.com/products/assessments/personality-assessment/shl-occupational-personality-questionnaire-opq/,Personality,P,Occupational Personality Questionnaire (OPQ)
49
- Job-Focused Assessments (JFA),https://www.shl.com/products/assessments/job-focused-assessments/,General,P,Job-Focused Assessments (JFA)
50
- Motivational Questionnaire (MQ),https://www.shl.com/products/assessments/personality-assessment/shl-motivation-questionnaire-mq/,General,P,Motivational Questionnaire (MQ)
51
- Situational Judgment Tests (SJT),https://www.shl.com/products/assessments/behavioral-assessments/situation-judgement-tests-sjt/,General,P,Situational Judgment Tests (SJT)
52
- SHL Verify,https://www.shl.com/products/assessments/cognitive-assessments/,General,P,SHL Verify
53
- SHL 360,https://www.shl.com/products/360/,General,P,SHL 360
54
- Assessments,https://www.shl.com/products/assessments/,General,P,Assessments
55
- Behavioral Assessments,https://www.shl.com/products/assessments/behavioral-assessments/,Personality,P,Behavioral Assessments
56
- Cognitive Assessments,https://www.shl.com/products/assessments/cognitive-assessments/,General,K,Cognitive Assessments
57
- Personality Assessments,https://www.shl.com/products/assessments/personality-assessment/,Personality,P,Personality Assessments
58
- Video Interviews,https://www.shl.com/products/video-interviews/,General,P,Video Interviews
59
- Skills & Simulations,https://www.shl.com/products/assessments/skills-and-simulations/,General,K,Skills & Simulations
60
- Call Center Simulations,https://www.shl.com/products/assessments/skills-and-simulations/call-center-simulations/,General,P,Call Center Simulations
61
- Business Skills,https://www.shl.com/products/assessments/skills-and-simulations/business-skills/,General,K,Business Skills
62
- Coding Simulations,https://www.shl.com/products/assessments/skills-and-simulations/coding-simulations/,Technical,K,Coding Simulations
63
- Technical Skills,https://www.shl.com/products/assessments/skills-and-simulations/technical-skills/,General,K,Technical Skills
64
- Language Evaluation,https://www.shl.com/products/assessments/skills-and-simulations/language-evaluation/,Verbal,P,Language Evaluation
65
- View all SHL ProductsGet the ultimate view of potential with SHL’s unmatched portfolio of assessments and interview technology.,https://www.shl.com/products/,General,P,View all SHL ProductsGet the ultimate view of potential with SHL’s unmatched portfolio of assessments and interview technology.
66
- Services,https://www.shl.com/solutions/services/,General,P,Services
67
- Managed Services,https://www.shl.com/solutions/services/managed-services/,General,P,Managed Services
68
- Training Services,https://www.shl.com/solutions/services/training-services/,General,P,Training Services
69
- SHL Certification (OPQ/Verify),https://www.shl.com/solutions/services/training-services/personality-and-ability-assessment-training/,General,P,SHL Certification (OPQ/Verify)
70
- Training Calendar,https://www.shl.com/solutions/services/training-calendar/,General,P,Training Calendar
71
- Outsourced Assessments (VADC),https://www.shl.com/products/assessments/assessment-and-development-centers/,General,P,Outsourced Assessments (VADC)
72
- View Product Catalog,https://www.shl.com/products/product-catalog/,General,P,View Product Catalog
73
- HR Priorities,https://www.shl.com/hr-priorities/,General,P,HR Priorities
74
- HR PrioritiesExplore the latest HR priorities and insights on workforce trends.,https://www.shl.com/hr-priorities/,General,K,HR PrioritiesExplore the latest HR priorities and insights on workforce trends.
75
- Skills-Based Organizations,https://www.shl.com/hr-priorities/skills-based-organizations/,General,K,Skills-Based Organizations
76
- Skills-Based Hiring,https://www.shl.com/hr-priorities/skills-based-organizations/skills-based-hiring/,General,K,Skills-Based Hiring
77
- Skills-Based Talent Management,https://www.shl.com/hr-priorities/skills-based-organizations/skills-based-talent-management/,Leadership,K,Skills-Based Talent Management
78
- Decisions with People Data,https://www.shl.com/hr-priorities/decisions-with-people-data/,General,K,Decisions with People Data
79
- Manager and Leader Development,https://www.shl.com/hr-priorities/manager-leadership-development/,General,P,Manager and Leader Development
80
- Watch Now,https://www.shl.com/resources/by-type/webinars/ai-and-the-future-of-work-how-hr-leads-the-skills-transformation/,General,P,Watch Now
81
- Resources,https://www.shl.com/resources/,General,P,Resources
82
- View all SHL Resources,https://www.shl.com/resources/,General,P,View all SHL Resources
83
- Blogs,https://www.shl.com/resources/by-type/blog/,General,P,Blogs
84
- "eBooks, Guides, and Tools",https://www.shl.com/resources/by-type/guides-and-ebooks/,General,P,"eBooks, Guides, and Tools"
85
- Research & Reports,https://www.shl.com/resources/by-type/whitepapers-and-reports/,General,P,Research & Reports
86
- Webinars,https://www.shl.com/resources/by-type/webinars/,General,P,Webinars
87
- Demos On-Demand,https://www.shl.com/resources/by-type/demos/,General,P,Demos On-Demand
88
- Customer Stories,https://www.shl.com/resources/by-type/customer-stories/,General,P,Customer Stories
89
- View all Resources,https://www.shl.com/resources/,General,P,View all Resources
90
- SHL LabsAdvancing Talent with Innovation and Insights,https://www.shl.com/resources/shl-labs/,General,P,SHL LabsAdvancing Talent with Innovation and Insights
91
- Candidate Experience,https://www.shl.com/resources/shl-labs/candidate-experience/,General,P,Candidate Experience
92
- People Insights,https://www.shl.com/resources/shl-labs/people-insights/,General,P,People Insights
93
- "Diversity, Inclusion, and Accessibility",https://www.shl.com/resources/shl-labs/diversity-equity-inclusion-belonging-and-accessibility/,General,P,"Diversity, Inclusion, and Accessibility"
94
- Our Science,https://www.shl.com/resources/shl-labs/our-science/,General,P,Our Science
95
- Research Publications,https://www.shl.com/resources/shl-labs/research-publications/,General,P,Research Publications
96
- Read Report,https://www.shl.com/resources/by-type/whitepapers-and-reports/hr-skills-insights-creating-a-future-ready-hr-team-built-for-success/,General,P,Read Report
97
- About,https://www.shl.com/about/,General,P,About
98
- Learn More,https://www.shl.com/about/,General,P,Learn More
99
- Company,https://www.shl.com/about/company/,General,P,Company
100
- Leadership Team,https://www.shl.com/about/company/leadership-team/,Leadership,P,Leadership Team
101
- News & Events,https://www.shl.com/about/news-and-events/,General,P,News & Events
102
- Press Releases,https://www.shl.com/about/news-and-events/press-releases/,General,P,Press Releases
103
- In the News,https://www.shl.com/about/news-and-events/in-the-news/,General,P,In the News
104
- Awards & Accolades,https://www.shl.com/about/news-and-events/awards-and-accolades/,General,P,Awards & Accolades
105
- Events & Conferences,https://www.shl.com/about/news-and-events/events/,General,P,Events & Conferences
106
- Partners,https://www.shl.com/about/partners/,General,P,Partners
107
- Research Partners,https://www.shl.com/about/partners/research-partners/,General,P,Research Partners
108
- Skills Partner Program,https://www.shl.com/about/partners/skills-partner-program/,General,K,Skills Partner Program
109
- Resellers,https://www.shl.com/about/partners/resellers/,General,P,Resellers
110
- Sales Inquiries,https://www.shl.com/about/company/contact/book-a-demo/,General,P,Sales Inquiries
111
- Media Inquiries,https://www.shl.com/about/company/contact/#media-inquiries,General,P,Media Inquiries
112
- Book a Demo,https://www.shl.com/about/company/contact/book-a-demo/,General,P,Book a Demo
113
- Home,https://www.shl.com/,General,P,Home
114
- Administrative Professional - Short Form,https://www.shl.com/products/product-catalog/view/administrative-professional-short-form/,General,P,Administrative Professional - Short Form
115
- Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/view/apprentice-8-0-job-focused-assessment-4261/,General,P,Apprentice + 8.0 Job Focused Assessment
116
- Apprentice 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/view/apprentice-8-0-job-focused-assessment/,General,P,Apprentice 8.0 Job Focused Assessment
117
- Bank Administrative Assistant - Short Form,https://www.shl.com/products/product-catalog/view/bank-administrative-assistant-short-form/,General,P,Bank Administrative Assistant - Short Form
118
- Bank Collections Agent - Short Form,https://www.shl.com/products/product-catalog/view/bank-collections-agent-short-form/,General,P,Bank Collections Agent - Short Form
119
- Bank Operations Supervisor - Short Form,https://www.shl.com/products/product-catalog/view/bank-operations-supervisor-short-form/,Leadership,P,Bank Operations Supervisor - Short Form
120
- "Bookkeeping, Accounting, Auditing Clerk Short Form",https://www.shl.com/products/product-catalog/view/bookkeeping-accounting-auditing-clerk-short-form/,General,P,"Bookkeeping, Accounting, Auditing Clerk Short Form"
121
- Branch Manager - Short Form,https://www.shl.com/products/product-catalog/view/branch-manager-short-form/,General,P,Branch Manager - Short Form
122
- Next,https://www.shl.com/products/product-catalog/?start=12&type=2,General,P,Next
123
- Global Skills Development Report,https://www.shl.com/products/product-catalog/view/global-skills-development-report/,General,K,Global Skills Development Report
124
- .NET Framework 4.5,https://www.shl.com/products/product-catalog/view/net-framework-4-5/,General,P,.NET Framework 4.5
125
- .NET MVC (New),https://www.shl.com/products/product-catalog/view/net-mvc-new/,General,P,.NET MVC (New)
126
- .NET MVVM (New),https://www.shl.com/products/product-catalog/view/net-mvvm-new/,General,P,.NET MVVM (New)
127
- .NET WCF (New),https://www.shl.com/products/product-catalog/view/net-wcf-new/,General,P,.NET WCF (New)
128
- .NET WPF (New),https://www.shl.com/products/product-catalog/view/net-wpf-new/,General,P,.NET WPF (New)
129
- .NET XAML (New),https://www.shl.com/products/product-catalog/view/net-xaml-new/,General,P,.NET XAML (New)
130
- Accounts Payable (New),https://www.shl.com/products/product-catalog/view/accounts-payable-new/,General,P,Accounts Payable (New)
131
- Accounts Payable Simulation (New),https://www.shl.com/products/product-catalog/view/accounts-payable-simulation-new/,General,P,Accounts Payable Simulation (New)
132
- Accounts Receivable (New),https://www.shl.com/products/product-catalog/view/accounts-receivable-new/,General,P,Accounts Receivable (New)
133
- Accounts Receivable Simulation (New),https://www.shl.com/products/product-catalog/view/accounts-receivable-simulation-new/,General,P,Accounts Receivable Simulation (New)
134
- ADO.NET (New),https://www.shl.com/products/product-catalog/view/ado-net-new/,General,P,ADO.NET (New)
135
- About SHL,https://www.shl.com/about/,General,P,About SHL
136
- Case Studies,https://www.shl.com/resources/by-type/customer-stories/,General,P,Case Studies
137
- SHL Careers,https://www.shl.com/careers/,General,P,SHL Careers
138
- Subscribe,https://www.shl.com/about/company/contact/subscribe/,General,P,Subscribe
139
- Platform Login,https://www.shl.com/login/,General,P,Platform Login
140
- Client Support↗,https://support.shl.com/categories.html?hl=en&c=10_91_13_,General,P,Client Support↗
141
- Product Catalog,https://www.shl.com/products/product-catalog/,General,P,Product Catalog
142
- Candidate Support↗,https://support.shl.com/categories.html?hl=en&c=10_91_12_,General,P,Candidate Support↗
143
- Raise an Issue↗,https://support.shl.com/contactUs.html?hl=en&c=10_91_12_,General,P,Raise an Issue↗
144
- Neurodiversity Hub,https://www.shl.com/shldirect/en/neurodiversity-information-hub-for-candidates/,General,P,Neurodiversity Hub
145
- AMCAT↗,https://www.myamcat.com,General,P,AMCAT↗
146
- Cookie Policy,https://www.shl.com/legal/privacy/cookie-policy/,General,P,Cookie Policy
147
- Privacy Notice,https://www.shl.com/legal/privacy/,General,P,Privacy Notice
148
- Security & Compliance,https://www.shl.com/legal/security-and-compliance/,General,P,Security & Compliance
149
- Legal Resources,https://www.shl.com/legal/,General,P,Legal Resources
150
- UK Modern Slavery,https://www.shl.com/legal/shl-modern-slavery-act/,General,P,UK Modern Slavery
151
- Site Map,https://www.shl.com/company/site-map/,General,P,Site Map
152
- Site Search,https://www.shl.com/search/,General,P,Site Search
153
- Search by keyword...,https://www.shl.com/products/product-catalog/view/account-manager-solution/,General,P,Search by keyword...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/main.py CHANGED
@@ -292,23 +292,23 @@ async def startup_event():
292
  # Create directories
293
  os.makedirs('data', exist_ok=True)
294
  os.makedirs('models', exist_ok=True)
295
- os.makedirs('Data', exist_ok=True)
296
 
297
- # Run setup
298
  from src.crawler import SHLCrawler
299
- from src.embedder import AssessmentEmbedder
300
-
301
- logger.info("📊 Scraping SHL catalog...")
302
  crawler = SHLCrawler()
303
- crawler.scrape_catalog()
 
 
 
 
 
 
304
 
305
- logger.info("🔮 Building search index...")
306
- embedder = AssessmentEmbedder()
307
- embedder.load_catalog()
308
- embedder.create_embeddings()
309
  embedder.build_index()
310
- embedder.save_index()
311
-
312
  logger.info("✅ Setup complete!")
313
 
314
  # Load recommender
@@ -349,9 +349,9 @@ async def health():
349
  """Health check endpoint"""
350
  return {
351
  "status": "healthy" if recommender and reranker else "initializing",
352
- "index_loaded": recommender is not None and recommender.index is not None,
353
- "catalog_size": len(recommender.assessment_data) if recommender and recommender.assessment_data else 0,
354
- "reranker_loaded": reranker is not None
355
  }
356
 
357
  @app.post("/recommend", response_model=RecommendResponse)
@@ -402,12 +402,15 @@ async def get_catalog():
402
  raise HTTPException(status_code=503, detail="Service initializing")
403
 
404
  try:
 
 
 
405
  return {
406
- "assessments": recommender.assessment_data,
407
- "count": len(recommender.assessment_data),
408
  "types": {
409
- "K": sum(1 for a in recommender.assessment_data if a.get('test_type') == 'K'),
410
- "P": sum(1 for a in recommender.assessment_data if a.get('test_type') == 'P')
411
  }
412
  }
413
  except Exception as e:
 
292
  # Create directories
293
  os.makedirs('data', exist_ok=True)
294
  os.makedirs('models', exist_ok=True)
 
295
 
296
+ # Build catalog
297
  from src.crawler import SHLCrawler
 
 
 
298
  crawler = SHLCrawler()
299
+ df = crawler.scrape_catalog()
300
+ try:
301
+ df = df.fillna('')
302
+ df.to_csv('data/shl_catalog.csv', index=False)
303
+ logger.info("📊 Catalog saved to data/shl_catalog.csv")
304
+ except Exception as e:
305
+ logger.warning(f"Catalog save failed: {e}")
306
 
307
+ # Build index using correct embedder
308
+ from src.embedder import EmbeddingGenerator
309
+ logger.info("🔮 Building search index with EmbeddingGenerator...")
310
+ embedder = EmbeddingGenerator()
311
  embedder.build_index()
 
 
312
  logger.info("✅ Setup complete!")
313
 
314
  # Load recommender
 
349
  """Health check endpoint"""
350
  return {
351
  "status": "healthy" if recommender and reranker else "initializing",
352
+ "index_loaded": bool(recommender and getattr(recommender, 'faiss_index', None)),
353
+ "catalog_size": len(getattr(recommender, 'assessment_mapping', {}) or {}),
354
+ "reranker_loaded": bool(reranker)
355
  }
356
 
357
  @app.post("/recommend", response_model=RecommendResponse)
 
402
  raise HTTPException(status_code=503, detail="Service initializing")
403
 
404
  try:
405
+ # Convert mapping dict to list for API response
406
+ mapping = getattr(recommender, 'assessment_mapping', {})
407
+ assessments = list(mapping.values())
408
  return {
409
+ "assessments": assessments,
410
+ "count": len(assessments),
411
  "types": {
412
+ "K": sum(1 for a in assessments if a.get('test_type') == 'K'),
413
+ "P": sum(1 for a in assessments if a.get('test_type') == 'P')
414
  }
415
  }
416
  except Exception as e:
setup.py CHANGED
@@ -151,13 +151,14 @@ def step1_generate_catalog():
151
  df['Test Type'] = 'K'
152
  logger.info(f"✓ Used first 3 columns with defaults")
153
  else:
154
- logger.error("ERROR - Not enough columns in Excel file")
155
- return False
 
156
 
157
  # Verify we have data
158
  if len(df) == 0:
159
- logger.error("ERROR - Excel file is empty")
160
- return False
161
 
162
  # Clean data
163
  df = df.fillna('')
@@ -173,18 +174,19 @@ def step1_generate_catalog():
173
  return True
174
 
175
  # Priority 3: Scrape from web (last resort)
176
- logger.warning("âš  No local data found, scraping SHL website...")
177
  from src.crawler import SHLCrawler
178
 
 
179
  crawler = SHLCrawler()
180
- crawler.scrape_catalog()
181
-
182
- if os.path.exists(csv_path):
183
- df = pd.read_csv(csv_path)
184
- logger.info(f"✓ Scraped {len(df)} assessments")
185
  return True
186
- else:
187
- logger.error("✗ Scraping failed and no catalog available")
188
  return False
189
 
190
  except Exception as e:
@@ -224,25 +226,14 @@ def step3_build_index():
224
  logger.info("Downloading models and creating embeddings...")
225
 
226
  try:
227
- from src.embedder import AssessmentEmbedder
228
-
229
- embedder = AssessmentEmbedder()
230
-
231
- # Load catalog
232
- embedder.load_catalog()
233
- logger.info(f"✓ Loaded {len(embedder.assessments)} assessments")
234
 
235
- # Create embeddings
236
- embedder.create_embeddings()
237
- logger.info(f"✓ Generated embeddings with shape {embedder.embeddings.shape}")
238
 
239
- # Build FAISS index
240
- embedder.build_index()
241
- logger.info(f"✓ Built FAISS index with {embedder.index.ntotal} vectors")
242
-
243
- # Save
244
- embedder.save_index()
245
- logger.info(f"✓ Index saved to models/ directory")
246
 
247
  return True
248
  except Exception as e:
@@ -323,15 +314,18 @@ def verify_setup():
323
  from src.recommender import AssessmentRecommender
324
 
325
  recommender = AssessmentRecommender()
326
- recommender.load_index()
 
 
 
327
 
328
- num_assessments = len(recommender.assessment_data)
329
- num_vectors = recommender.index.ntotal
330
 
331
  logger.info(f"✓ Loaded {num_assessments} assessments")
332
  logger.info(f"✓ Index has {num_vectors} vectors")
333
 
334
- if num_assessments < 100:
335
  logger.warning(f"âš  Only {num_assessments} assessments (expected 150+)")
336
 
337
  return True
 
151
  df['Test Type'] = 'K'
152
  logger.info(f"✓ Used first 3 columns with defaults")
153
  else:
154
+ logger.error("ERROR - Not enough columns in Excel file, falling back to web scrape")
155
+ # Fall through to scrape step below
156
+ raise FileNotFoundError("Insufficient Excel columns; use scrape fallback")
157
 
158
  # Verify we have data
159
  if len(df) == 0:
160
+ logger.error("ERROR - Excel file is empty, falling back to web scrape")
161
+ raise FileNotFoundError("Empty Excel file; use scrape fallback")
162
 
163
  # Clean data
164
  df = df.fillna('')
 
174
  return True
175
 
176
  # Priority 3: Scrape from web (last resort)
177
+ logger.warning("âš  No local data found or Excel unusable, scraping SHL website...")
178
  from src.crawler import SHLCrawler
179
 
180
+ os.makedirs('data', exist_ok=True)
181
  crawler = SHLCrawler()
182
+ df = crawler.scrape_catalog()
183
+ try:
184
+ df = df.fillna('')
185
+ df.to_csv(csv_path, index=False)
186
+ logger.info(f"✓ Scraped {len(df)} assessments; saved to {csv_path}")
187
  return True
188
+ except Exception as e:
189
+ logger.error(f"✗ Scraping failed and no catalog available: {e}")
190
  return False
191
 
192
  except Exception as e:
 
226
  logger.info("Downloading models and creating embeddings...")
227
 
228
  try:
229
+ from src.embedder import EmbeddingGenerator
 
 
 
 
 
 
230
 
231
+ embedder = EmbeddingGenerator()
 
 
232
 
233
+ # Build complete index pipeline (loads catalog, generates embeddings, saves artifacts)
234
+ index, embeddings, mapping = embedder.build_index()
235
+ logger.info(f"✓ Built FAISS index with {index.ntotal} vectors")
236
+ logger.info(f"✓ Embeddings shape {embeddings.shape}; Mappings {len(mapping)}")
 
 
 
237
 
238
  return True
239
  except Exception as e:
 
314
  from src.recommender import AssessmentRecommender
315
 
316
  recommender = AssessmentRecommender()
317
+ loaded = recommender.load_index()
318
+ if not loaded:
319
+ logger.error("✗ Recommender failed to load index during verification")
320
+ return False
321
 
322
+ num_assessments = len(recommender.assessment_mapping)
323
+ num_vectors = recommender.faiss_index.ntotal if recommender.faiss_index is not None else 0
324
 
325
  logger.info(f"✓ Loaded {num_assessments} assessments")
326
  logger.info(f"✓ Index has {num_vectors} vectors")
327
 
328
+ if num_assessments < 50:
329
  logger.warning(f"âš  Only {num_assessments} assessments (expected 150+)")
330
 
331
  return True