Spaces:

DvorakInnovationAI
/

Story-Analytics

Sleeping

App Files Files Community

Story-Analytics / app.py

subashdvorak

Update app.py

c45d789 verified 11 months ago

raw

history blame contribute delete

3.7 kB

	import gradio as gr
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	from sklearn.metrics.pairwise import cosine_similarity
	from sentence_transformers import SentenceTransformer
	import pandas as pd
	import re

	# Load the knowledge base
	encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])

	# Initialize the Sentence Transformer model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Function to preprocess text
	def preprocess_text(text):
	text = text.lower() # Lowercase
	text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
	return text

	def generate_graphs(new_story, metric):
	# Preprocess the new story
	new_story = preprocess_text(new_story)

	# Encode the new story
	new_story_vector = model.encode([new_story])[0]

	# Calculate similarity with knowledge base stories
	knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude metrics columns
	similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]

	# Filter indices with similarity > 0.85
	similar_indexes = np.where(similarities > 0.7)[0]
	print('Similar indexes:',similar_indexes)

	if len(similar_indexes) == 0:
	return None,None

	# Get metric distribution for stories with similarity > 0.85
	metric_distribution = encoded_df.iloc[similar_indexes][metric].values
	story_labels = [f"Story {i+1}" for i in similar_indexes]

	# Plot similarity distribution for all similar stories
	sim_dist_plot = plt.figure(figsize=(10, 6))
	sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
	for idx in similar_indexes:
	most_similar_vector = encoded_df.iloc[idx, :-7].values
	sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
	plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14)
	plt.xlabel("Vector Values", fontsize=12)
	plt.ylabel("Density", fontsize=12)
	plt.legend(title="Stories")
	plt.tight_layout()

	# Create a bar graph for the metric distribution
	metric_dist_plot = plt.figure(figsize=(10, 6))
	sns.barplot(x=story_labels, y=metric_distribution, palette="viridis")
	plt.title(f"{metric} Distribution for Similar Stories", fontsize=14)
	plt.xlabel("Story(most similar to least)", fontsize=12)
	plt.ylabel(metric, fontsize=12)
	plt.xticks(rotation=90)
	plt.tight_layout()

	return sim_dist_plot, metric_dist_plot


	# Gradio interface for a specific metric
	def gradio_interface(new_story, metric):
	sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric)
	if sim_dist_plot is None:
	return None, None
	return sim_dist_plot, metric_dist_plot


	# Create the Gradio interface with tabs
	def create_tab(metric):
	return gr.Interface(
	fn=lambda new_story: gradio_interface(new_story, metric),
	inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
	outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")],
	title=f"Story Similarity and {metric} Analysis",
	description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85."
	)

	likes_tab = create_tab("likesCount")
	comments_tab = create_tab("commentCount")
	shares_tab = create_tab("shareCount")

	# Combine tabs into a single app
	iface = gr.TabbedInterface(
	interface_list=[likes_tab, comments_tab, shares_tab],
	tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"]
	)

	# Launch the interface
	iface.launch(share=True)