Qsearch / App /engine /cpi_math.py
flyfir248's picture
Deployment v1.0 - Zero Secret History
d21c4c8
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def calculate_cpi(author_data, user_query):
# Weights based on your requested methodology
W1, W2, W3 = 0.3, 0.5, 0.2
# Dimension A: Authority (h-index normalization)
auth_score = min(author_data.get('h_index', 0) / 100, 1.0)
# Dimension B: Topical Density (Semantic similarity)
# Compare query against author interests and publication corpus
corpus = " ".join(author_data['interests']) + " " + " ".join(author_data.get('pub_titles', []))
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform([user_query.lower(), corpus.lower()])
depth_score = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
# Dimension C: Centrality (Placeholder)
centrality_score = 0.5
# Composite Performance Index Formula
final_index = (W1 * auth_score) + (W2 * depth_score) + (W3 * centrality_score)
return round(final_index * 100, 1)