BinKhoaLe1812 commited on
Commit
b344aa7
·
verified ·
1 Parent(s): 3c46098

Lenient thres

Browse files
Files changed (1) hide show
  1. models/reranker.py +9 -9
models/reranker.py CHANGED
@@ -42,7 +42,7 @@ class MedicalReranker:
42
  'generic_health_site': 0.30
43
  }
44
 
45
- # Irrelevant content patterns
46
  self.irrelevant_patterns = [
47
  r'quiz|test|assessment|survey',
48
  r'homepage|main page|index',
@@ -54,7 +54,7 @@ class MedicalReranker:
54
  r'healthy-sleep/quiz', # Sleep quiz example
55
  ]
56
 
57
- def rerank_results(self, query: str, results: List[Dict], min_score: float = 0.15) -> List[Dict]:
58
  """Rerank search results based on medical relevance"""
59
  if not results:
60
  return []
@@ -105,10 +105,10 @@ class MedicalReranker:
105
  logger.debug(f"Filtered irrelevant result: {url}")
106
  continue
107
 
108
- # Only skip if we have content and it's too short
109
  # Don't filter based on content length if no content is available yet
110
- if content and len(content) < 50: # Reduced from 100 to 50
111
- logger.debug(f"Filtered result with short content: {url}")
112
  continue
113
 
114
  filtered.append(result)
@@ -123,8 +123,8 @@ class MedicalReranker:
123
  url = result.get('url', '')
124
  domain = self._extract_domain(url)
125
 
126
- # Get domain score - be more lenient with unknown domains
127
- domain_score = self.domain_scores.get(domain, 0.50) # Increased default score
128
 
129
  # Boost score for medical-specific content
130
  title = result.get('title', '').lower()
@@ -243,8 +243,8 @@ class MedicalReranker:
243
  if query.lower() in title:
244
  title_relevance = min(title_relevance + 0.3, 1.0)
245
 
246
- # Update composite score - be more lenient
247
- domain_score = result.get('domain_score', 0.5) # Increased default
248
  result['title_relevance'] = title_relevance
249
  result['composite_score'] = (domain_score * 0.3) + (title_relevance * 0.7) # Favor title relevance
250
 
 
42
  'generic_health_site': 0.30
43
  }
44
 
45
+ # Irrelevant content patterns - more specific to avoid false positives
46
  self.irrelevant_patterns = [
47
  r'quiz|test|assessment|survey',
48
  r'homepage|main page|index',
 
54
  r'healthy-sleep/quiz', # Sleep quiz example
55
  ]
56
 
57
+ def rerank_results(self, query: str, results: List[Dict], min_score: float = 0.05) -> List[Dict]:
58
  """Rerank search results based on medical relevance"""
59
  if not results:
60
  return []
 
105
  logger.debug(f"Filtered irrelevant result: {url}")
106
  continue
107
 
108
+ # Only skip if we have content and it's extremely short
109
  # Don't filter based on content length if no content is available yet
110
+ if content and len(content) < 20: # Much more lenient - only filter very short content
111
+ logger.debug(f"Filtered result with very short content: {url}")
112
  continue
113
 
114
  filtered.append(result)
 
123
  url = result.get('url', '')
124
  domain = self._extract_domain(url)
125
 
126
+ # Get domain score - be much more lenient with unknown domains
127
+ domain_score = self.domain_scores.get(domain, 0.70) # Much higher default score
128
 
129
  # Boost score for medical-specific content
130
  title = result.get('title', '').lower()
 
243
  if query.lower() in title:
244
  title_relevance = min(title_relevance + 0.3, 1.0)
245
 
246
+ # Update composite score - be much more lenient
247
+ domain_score = result.get('domain_score', 0.7) # Much higher default
248
  result['title_relevance'] = title_relevance
249
  result['composite_score'] = (domain_score * 0.3) + (title_relevance * 0.7) # Favor title relevance
250