import streamlit as st import pandas as pd import numpy as np import torch import ast from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import random import asyncio import os os.environ["STREAMLIT_WATCHDOG"] = "0" try: asyncio.get_running_loop() except RuntimeError: asyncio.set_event_loop(asyncio.new_event_loop()) # Load pre-trained model model = SentenceTransformer('all-MiniLM-L6-v2') def get_embedding(text): return model.encode(text, convert_to_numpy=True) # Load datasets tweets_df = pd.read_csv("dataset_with_embeddings.csv") # Contains 'tweet_id' and 'sentence_embedding' original_tweets_df = pd.read_csv("dataset_with_actual_text.csv") # Contains 'tweet_id' and 'original_text' # Convert embeddings from string to list def parse_embedding(embedding_str): try: return ast.literal_eval(embedding_str) except (ValueError, SyntaxError): return None # Handle errors gracefully tweets_df['sentence_embedding'] = tweets_df['sentence_embedding'].astype(str).apply(parse_embedding) # Simulated user database users = {"testuser": "password123"} # Dictionary to store usernames and passwords # Session state for login if "logged_in" not in st.session_state: st.session_state["logged_in"] = False st.session_state["username"] = "" st.session_state["liked_tweets"] = [] st.session_state["posted_tweets"] = [] # Login Page if not st.session_state["logged_in"]: st.title("Tweet Recommendation System") username = st.text_input("Username") password = st.text_input("Password", type="password") if st.button("Login"): if username in users and users[username] == password: st.session_state["logged_in"] = True st.session_state["username"] = username st.success("Login successful!") st.experimental_rerun() else: st.error("Invalid credentials") else: st.title(f"Welcome, {st.session_state['username']}!") # Randomly select 10 tweets for the Explore Page random_tweets = original_tweets_df.sample(10) st.subheader("Explore Tweets") for _, row in random_tweets.iterrows(): tweet_id = row['tweet_id'] text = row['original_text'] with st.container(): st.write(f"*Tweet:* {text}") if st.button(f"Like ❤ {tweet_id}", key=f"like_{tweet_id}"): if tweet_id not in st.session_state["liked_tweets"]: st.session_state["liked_tweets"].append(tweet_id) st.success("Tweet Liked!") # Posting new tweets st.subheader("Post a Tweet") new_tweet = st.text_area("Write your tweet here:") if st.button("Post Tweet"): if new_tweet: new_tweet_embedding = get_embedding(new_tweet) st.session_state["posted_tweets"].append(new_tweet_embedding) st.success("Tweet posted successfully!") # Recommendation Logic if st.session_state["liked_tweets"] or st.session_state["posted_tweets"]: st.subheader("Recommended Tweets for You") # Get embeddings of liked tweets liked_embeddings = [tweets_df[tweets_df['tweet_id'] == tid]['sentence_embedding'].values[0] for tid in st.session_state["liked_tweets"]] # Get embeddings of posted tweets posted_embeddings = st.session_state["posted_tweets"] # Combine all embeddings user_profile_embedding = np.mean(liked_embeddings + posted_embeddings, axis=0) # Compute cosine similarity all_embeddings = np.vstack(tweets_df['sentence_embedding'].values) similarities = cosine_similarity([user_profile_embedding], all_embeddings)[0] # Get top 10 similar tweets top_indices = np.argsort(similarities)[-10:][::-1] recommended_tweets = tweets_df.iloc[top_indices] # Display recommended tweets for _, row in recommended_tweets.iterrows(): tweet_id = row['tweet_id'] text = original_tweets_df[original_tweets_df['tweet_id'] == tweet_id]['original_text'].values[0] with st.container(): st.write(f"*Recommended Tweet:* {text}")