import time import pandas as pd from sentence_transformers import SentenceTransformer sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder=r"C:\Users\HP\.cache\huggingface\hub\models--sentence-transformers--all-MiniLM-L6-v2") def get_microseconds_list(length=3): # Get the current time in microseconds microseconds = int(time.time() * 1_000_000) # Create a list with three microseconds return [microseconds + i for i in range(length)] def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False): # sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder=cache_dir) if isForCompetitorAnalysis==True: df=pd.read_csv(path1) else: df0 = pd.read_csv(path1) df1 = pd.read_csv(path2) df2 = pd.read_csv(path3) df = pd.concat([df0, df1, df2],axis=0) df = df.drop_duplicates("title") df = df.reset_index(drop=True) df = df.drop("index", axis = 1) title = df["title"] sentences = [query] + list(title) embeddings = sentence_model.encode(sentences) similarities = sentence_model.similarity(embeddings[0], embeddings) print(len(similarities[0])) df["similarity"] = similarities[0][1:] df = df.sort_values(by='similarity', ascending=False) df = df.reset_index(drop=True) df = df.head(30) df = df.sort_values(by=['comment_count','votes_count'], ascending=False) df = df.reset_index(drop=True) df = df.head(18) return df reddit_services_names= ['Pain point analysis','Competitor analysis']