Spaces:
Runtime error
Runtime error
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| import difflib | |
| from utils.GetDB import GetDB | |
| postgreSQL_pool = GetDB().get_db_connection() | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| def get_question(): | |
| # Connect to the PostgreSQL database | |
| conn = postgreSQL_pool.getconn() | |
| # Create a cursor object | |
| cur = conn.cursor() | |
| # Execute a SELECT query to fetch data from the "users" table | |
| cur.execute("SELECT question FROM chat_history ORDER BY created_at DESC") | |
| # Fetch all the results as a list of tuples | |
| results = cur.fetchall() | |
| results = [x[0] for x in results] | |
| # Close the cursor and connection | |
| # # cur.close() | |
| postgreSQL_pool.putconn(conn) | |
| return results | |
| def count_top_questions(questions_array): | |
| corpus_embeddings = embedder.encode(questions_array, convert_to_tensor=True) | |
| top_questions_array = {} | |
| for question in questions_array: | |
| query_embedding = embedder.encode([question], convert_to_tensor=True) | |
| # We use cosine-similarity and torch.topk to find the highest 5 scores | |
| cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] | |
| top_results = torch.topk(cos_scores, k=100) | |
| counter = 0 | |
| for score, idx in zip(top_results[0][1:], top_results[1][1:]): | |
| if score.item() >= 0.8: | |
| counter += 1 | |
| top_questions_array[question] = counter | |
| # removing duplicate tuples | |
| return sorted(top_questions_array.items(), key=lambda x: x[1], reverse=True)[:50] | |
| def remove_redundancy(redundant_raw_top_asked_questions): | |
| for raw_top_asked_question in redundant_raw_top_asked_questions: | |
| for raw_top_asked_question_inner in redundant_raw_top_asked_questions: | |
| matching_ratio = difflib.SequenceMatcher(None, raw_top_asked_question_inner[0], | |
| raw_top_asked_question[0]).ratio() | |
| if 0.7 <= matching_ratio < 1.0: | |
| redundant_raw_top_asked_questions.remove(raw_top_asked_question_inner) | |
| return redundant_raw_top_asked_questions | |
| def remove_greetings(sanitised_questions_array): | |
| greeting_array = ['hey', 'hi', 'hello', "Hello!", | |
| "Hi there!", | |
| "Hey!", | |
| "Good morning!", | |
| "Good afternoon!", | |
| "Good evening!", | |
| "Howdy!", | |
| "Greetings!", | |
| "Nice to see you!", | |
| "What's up?", | |
| "Hi!", | |
| "hiiii", | |
| "Hello!", | |
| "Hey!", "How are you?", | |
| "What is your name?", | |
| "Where are you from?", | |
| "What do you do?", | |
| "How can I help you?", | |
| "What's the weather like?", | |
| "Do you have any plans for the weekend?", | |
| "Have you seen any good movies lately?", | |
| "What's your favorite food?", | |
| "What are your hobbies?", "hi, hello"] | |
| greetings_embeddings = embedder.encode(greeting_array, convert_to_tensor=True) | |
| for raw_top_asked_question in sanitised_questions_array[:10]: | |
| query_embedding = embedder.encode([raw_top_asked_question[0]], convert_to_tensor=True) | |
| cos_scores = util.cos_sim(query_embedding, greetings_embeddings)[0] | |
| top_results = torch.topk(cos_scores, k=1) | |
| for score, idx in zip(top_results[0], top_results[1]): | |
| if score.item() >= 0.87: | |
| sanitised_questions_array.remove(raw_top_asked_question) | |
| return sanitised_questions_array | |
| def final_phase_filtering(raw_first_phase_filtered_questions, limit=20): | |
| raw_first_phase_filtered_questions = raw_first_phase_filtered_questions[:limit] | |
| for raw_first_phase_filtered_question in raw_first_phase_filtered_questions: | |
| for raw_first_phase_filtered_question_inner in raw_first_phase_filtered_questions: | |
| emb1 = embedder.encode(raw_first_phase_filtered_question[0]) | |
| emb2 = embedder.encode(raw_first_phase_filtered_question_inner[0]) | |
| cos_sim = util.cos_sim(emb1, emb2) | |
| if 0.85 <= cos_sim.item() < 1.0000001192092896: | |
| raw_first_phase_filtered_questions.remove(raw_first_phase_filtered_question_inner) | |
| return raw_first_phase_filtered_questions | |
| def return_top_question(limit=5): | |
| questions = get_question() | |
| print('questions') | |
| count_top_questions_ = count_top_questions(questions) | |
| print('count_top_questions_') | |
| remove_redundancy_ = remove_redundancy(count_top_questions_) | |
| print('remove_redundancy_') | |
| remove_greetings_ = remove_greetings(remove_redundancy_) | |
| print('remove_greetings_') | |
| final_phase_filtering_ = final_phase_filtering(remove_greetings_)[:limit] | |
| print('final_phase_filtering_') | |
| message = 'These are the top questions asked on the ask twimbit/platform by the users:' | |
| for key, final_phase_filtering__ in enumerate(final_phase_filtering_): | |
| message = message + '\n {}: '.format(key + 1) + final_phase_filtering__[0] | |
| return message | |
| def return_recent_posts(limit=5, strategy='recent'): | |
| import os | |
| import requests | |
| import json | |
| HASURA_URL = os.environ['HASURA_URL'] | |
| HASURA_ADMIN_SECRET = os.environ['HASURA_ADMIN_SECRET'] | |
| url = HASURA_URL | |
| body = """query homeFeedQuery($strategy: Strategy, $limit: Int){ | |
| feed(strategy: $strategy, limit: $limit) { | |
| hits { | |
| link | |
| title | |
| date | |
| author | |
| } | |
| } | |
| }""" | |
| variables = {'strategy': strategy, 'limit': limit} | |
| response = requests.post(url=url, json={'query': body, 'variables': variables}, headers={ | |
| 'x-hasura-admin-secret': HASURA_ADMIN_SECRET}) | |
| message = 'These are the recent Articles/Posts on the platform/twimbit website: \n' | |
| if response.status_code == 200: | |
| data = json.loads(response.content) | |
| posts = data.get('data').get('feed').get('hits') | |
| for key, post in enumerate(posts): | |
| title = post.get('title') | |
| link = post.get('link') | |
| date = post.get('date') | |
| authors = ','.join(post.get('author')) | |
| message += 'Post/Article {}:- \n\tPost/Article Title:- {}\n\tPost/Article Link/URL:- {}\n\tPost/Article Publish Date:- {}\n\tPost/Article Author:- {}\n'.format( | |
| key + 1, title, link, date, authors) | |
| return message | |