Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.cluster import KMeans | |
| import gradio as gr | |
| # Adjust the file path to where titles.csv is located in your environment | |
| CSV_FILE_PATH = 'titles.csv' # Update this with the correct path to your CSV file | |
| def topic_modeling(n_clusters): | |
| # Read the CSV file directly from the specified file path | |
| df = pd.read_csv(CSV_FILE_PATH) | |
| if 'title' not in df.columns: | |
| return "Error: CSV does not contain a 'title' column" | |
| titles = df['title'].astype(str).tolist() # Convert the 'title' column to a list of strings | |
| vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_df=0.9) | |
| X = vectorizer.fit_transform(titles) | |
| model = KMeans(n_clusters=n_clusters, random_state=42) | |
| df['topic'] = model.fit_predict(X) | |
| # Prepare the output to display the top titles for each topic | |
| output = "" | |
| for i in range(n_clusters): | |
| output += f"\n### Topic {i}\n" | |
| top_titles = df[df['topic'] == i]['title'].head(5) | |
| output += "\n".join(f"- {t}" for t in top_titles) + "\n" | |
| return output | |
| # Define the Gradio interface (no file input, just a slider for n_clusters) | |
| iface = gr.Interface( | |
| fn=topic_modeling, | |
| inputs=[gr.Slider(2, 10, step=1, value=5, label="Number of Topics")], | |
| outputs="markdown", | |
| title="Topic Modeling App (TF-IDF + KMeans)", | |
| description="This app clusters titles from a CSV file into topics using TF-IDF and KMeans. No file upload needed." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |