Spaces:

garvitcpp
/

qodex

Running

App Files Files Community

garvitcpp commited on Nov 17, 2025

Commit

e85c608

verified ·

1 Parent(s): a722b8e

Update app/services/vector_service.py

Browse files

Files changed (1) hide show

app/services/vector_service.py +61 -3

app/services/vector_service.py CHANGED Viewed

@@ -23,9 +23,67 @@ class VectorService:
         """Store embeddings using Pinecone"""
         return await self.pinecone_service.store_embeddings(repository_id, embedded_chunks)
-    async def search_similar_code(self, repository_id: int, query_embedding: List[float], top_k: int = 5) -> List[Dict]:
-        """Search for similar code using Pinecone"""
-        return await self.pinecone_service.search_similar_code(repository_id, query_embedding, top_k)
     async def delete_repository_data(self, repository_id: int):
         """Delete repository data using Pinecone"""

         """Store embeddings using Pinecone"""
         return await self.pinecone_service.store_embeddings(repository_id, embedded_chunks)
+    async def search_similar_code(self, repository_id: int, query_embedding: List[float], top_k: int = 5, query_text: str = "") -> List[Dict]:
+        """Search for similar code using Pinecone with hybrid search"""
+        # Get initial results from Pinecone
+        results = await self.pinecone_service.search_similar_code(repository_id, query_embedding, top_k * 3)
+        # If query text provided, apply keyword boosting
+        if query_text and results:
+            print(f"🔄 [HYBRID] Applying keyword boost for: '{query_text}'", flush=True)
+            results = self._apply_keyword_boost(results, query_text)
+            results = results[:top_k]  # Return only top_k after re-ranking
+            print(f"✅ [HYBRID] Re-ranked and returning top {len(results)} results", flush=True)
+        return results
+    def _apply_keyword_boost(self, results: List[Dict], query: str) -> List[Dict]:
+        """Apply keyword-based boosting to semantic search results"""
+        # Extract important keywords from query
+        query_lower = query.lower()
+        query_words = set(query_lower.split())
+        # Remove common stop words
+        stop_words = {'the', 'is', 'a', 'an', 'in', 'on', 'at', 'for', 'to', 'of', 'and', 'or', 'how', 'what', 'why', 'where', 'when', 'it', 'this', 'that', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'can', 'may', 'might', 'must'}
+        query_keywords = query_words - stop_words
+        print(f"🔑 [KEYWORDS] Extracted: {query_keywords}", flush=True)
+        for result in results:
+            content = result.get('content', '').lower()
+            file_path = result.get('file_path', '').lower()
+            # Count keyword matches in content
+            content_matches = sum(1 for keyword in query_keywords if keyword in content)
+            # Count keyword matches in file path (weighted higher)
+            path_matches = sum(1 for keyword in query_keywords if keyword in file_path)
+            # Calculate keyword score (0 to 1)
+            if query_keywords:
+                keyword_score = (content_matches + path_matches * 2) / (len(query_keywords) * 3)
+            else:
+                keyword_score = 0
+            # Original semantic similarity
+            semantic_score = result.get('similarity', 0)
+            # Hybrid score: 70% semantic + 30% keyword
+            hybrid_score = (semantic_score * 0.7) + (keyword_score * 0.3)
+            # Update the result
+            result['similarity'] = hybrid_score
+            result['semantic_score'] = semantic_score
+            result['keyword_score'] = keyword_score
+            print(f"📊 [SCORE] {file_path}: semantic={semantic_score:.3f}, keyword={keyword_score:.3f}, hybrid={hybrid_score:.3f}", flush=True)
+        # Re-sort by hybrid score
+        results.sort(key=lambda x: x['similarity'], reverse=True)
+        return results
     async def delete_repository_data(self, repository_id: int):
         """Delete repository data using Pinecone"""