Miroir commited on
Commit
36cc505
·
1 Parent(s): 73390f3

changed word service to handle coroutine problem

Browse files
Files changed (2) hide show
  1. services/study_service.py +15 -15
  2. services/word_service.py +10 -9
services/study_service.py CHANGED
@@ -80,38 +80,38 @@ class StudyService:
80
  "vector_norm": None
81
  }
82
 
 
83
  async def analyze_analogy(self,
84
- word1: str,
85
- word2: str,
86
- word3: str,
87
- n_results: int = 10) -> Dict:
88
  """
89
- Analyze word analogies (a:b :: c:?)
90
-
91
  Example: paris:france :: berlin:? (should find "allemagne")
92
  """
93
  try:
94
- # Get vectors
95
  vec1 = await self.word_service.get_vector(word1)
96
  vec2 = await self.word_service.get_vector(word2)
97
  vec3 = await self.word_service.get_vector(word3)
98
-
99
- if None in (vec1, vec2, vec3):
 
100
  return {
101
  "analogy": f"{word1}:{word2} :: {word3}:?",
102
  "similar_words": [],
103
  "error": "One or more words not found in vocabulary"
104
  }
105
-
106
  # Calculate analogy vector (vec2 - vec1 + vec3)
107
  analogy_vec = vec2 - vec1 + vec3
108
-
109
- # Normalize the vector
110
  analogy_vec = analogy_vec / np.linalg.norm(analogy_vec)
111
-
112
- # Find similar words
113
  similar_words = await self.word_service.get_similar_by_vector(analogy_vec, n=n_results)
114
-
115
  return {
116
  "analogy": f"{word1}:{word2} :: {word3}:?",
117
  "similar_words": similar_words
 
80
  "vector_norm": None
81
  }
82
 
83
+
84
  async def analyze_analogy(self,
85
+ word1: str,
86
+ word2: str,
87
+ word3: str,
88
+ n_results: int = 10) -> Dict:
89
  """
90
+ Analyze word analogies (a:b :: c:?).
 
91
  Example: paris:france :: berlin:? (should find "allemagne")
92
  """
93
  try:
94
+ # Get vectors for each word
95
  vec1 = await self.word_service.get_vector(word1)
96
  vec2 = await self.word_service.get_vector(word2)
97
  vec3 = await self.word_service.get_vector(word3)
98
+
99
+ # Use explicit checks to see if any vector is missing
100
+ if vec1 is None or vec2 is None or vec3 is None:
101
  return {
102
  "analogy": f"{word1}:{word2} :: {word3}:?",
103
  "similar_words": [],
104
  "error": "One or more words not found in vocabulary"
105
  }
106
+
107
  # Calculate analogy vector (vec2 - vec1 + vec3)
108
  analogy_vec = vec2 - vec1 + vec3
109
+
110
+ # Normalize the analogy vector
111
  analogy_vec = analogy_vec / np.linalg.norm(analogy_vec)
112
+
113
+ # Find similar words using the analogy vector
114
  similar_words = await self.word_service.get_similar_by_vector(analogy_vec, n=n_results)
 
115
  return {
116
  "analogy": f"{word1}:{word2} :: {word3}:?",
117
  "similar_words": similar_words
services/word_service.py CHANGED
@@ -121,8 +121,9 @@ class WordEmbeddingService:
121
  logger.exception(f"Error finding similar words for: {target_word}")
122
  return []
123
 
 
124
  async def get_words_in_range(self, target_word: str, min_similarity: float,
125
- max_similarity: float, n: int = 5) -> List[Dict[str, float]]:
126
  """Get words within a similarity range"""
127
  try:
128
  logger.info(f"Finding words for '{target_word}' in range [{min_similarity}, {max_similarity}]")
@@ -131,8 +132,8 @@ class WordEmbeddingService:
131
  logger.warning(f"No vector for target word: {target_word}")
132
  return []
133
 
134
- # Run the CPU-intensive operations in a thread pool
135
- async def process_words():
136
  similarities = []
137
  norm_target = np.linalg.norm(target_vec)
138
  sample_size = min(100000, len(self.vocab_vectors))
@@ -141,24 +142,24 @@ class WordEmbeddingService:
141
  for vocab_word in sampled_words:
142
  if vocab_word == target_word.lower():
143
  continue
144
-
145
  vector = self.vocab_vectors[vocab_word]
146
  sim = float(np.dot(vector, target_vec) /
147
- (np.linalg.norm(vector) * norm_target))
148
-
149
  if min_similarity <= sim <= max_similarity:
150
  similarities.append({'word': vocab_word, 'similarity': sim})
151
-
152
  return similarities
153
 
 
154
  similarities = await asyncio.to_thread(process_words)
155
-
156
  if not similarities:
157
  return []
158
 
159
  similarities.sort(key=lambda x: x['similarity'], reverse=True)
160
  selected_words = random.sample(similarities, min(n, len(similarities)))
161
-
162
  return selected_words
163
 
164
  except Exception as e:
 
121
  logger.exception(f"Error finding similar words for: {target_word}")
122
  return []
123
 
124
+
125
  async def get_words_in_range(self, target_word: str, min_similarity: float,
126
+ max_similarity: float, n: int = 5) -> List[Dict[str, float]]:
127
  """Get words within a similarity range"""
128
  try:
129
  logger.info(f"Finding words for '{target_word}' in range [{min_similarity}, {max_similarity}]")
 
132
  logger.warning(f"No vector for target word: {target_word}")
133
  return []
134
 
135
+ # Define a synchronous function to process words
136
+ def process_words():
137
  similarities = []
138
  norm_target = np.linalg.norm(target_vec)
139
  sample_size = min(100000, len(self.vocab_vectors))
 
142
  for vocab_word in sampled_words:
143
  if vocab_word == target_word.lower():
144
  continue
145
+
146
  vector = self.vocab_vectors[vocab_word]
147
  sim = float(np.dot(vector, target_vec) /
148
+ (np.linalg.norm(vector) * norm_target))
149
+
150
  if min_similarity <= sim <= max_similarity:
151
  similarities.append({'word': vocab_word, 'similarity': sim})
152
+
153
  return similarities
154
 
155
+ # Use to_thread to run the synchronous function in a thread
156
  similarities = await asyncio.to_thread(process_words)
157
+
158
  if not similarities:
159
  return []
160
 
161
  similarities.sort(key=lambda x: x['similarity'], reverse=True)
162
  selected_words = random.sample(similarities, min(n, len(similarities)))
 
163
  return selected_words
164
 
165
  except Exception as e: