Spaces:
Runtime error
Runtime error
| from flask import Flask | |
| import gradio as gr | |
| import pandas as pd | |
| from bertopic import BERTopic | |
| from sentence_transformers import SentenceTransformer | |
| import re | |
| app = Flask(__name__) | |
| # Function to clean text | |
| def clean_text(text): | |
| text = text.lower() | |
| text = re.sub(r"http\S+", "", text) # Remove URLs | |
| text = re.sub(r"[^a-z\s]", "", text) # Remove special characters | |
| text = re.sub(r"\s+", " ", text) # Remove extra whitespace | |
| return text.strip() | |
| # Function to perform topic modeling | |
| def extract_topics(): | |
| # Read CSV file | |
| df = pd.read_csv("titles.csv") | |
| # Drop rows with missing titles | |
| df = df.dropna(subset=["title"]) | |
| # Preprocess titles | |
| df["clean_title"] = df["title"].apply(clean_text) | |
| # Initialize embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Initialize BERTopic model | |
| topic_model = BERTopic(embedding_model=embedding_model, min_topic_size=2) | |
| # Fit the model | |
| topics, _ = topic_model.fit_transform(df["clean_title"].tolist()) | |
| # Get topic information | |
| topic_info = topic_model.get_topic_info() | |
| return topic_info[["Topic", "Name", "Count"]].to_string(index=False) | |
| # Gradio interface | |
| demo = gr.Interface(fn=extract_topics, inputs=[], outputs="text") | |
| def home(): | |
| return demo.launch(share=False, inline=True) | |
| if __name__ == "__main__": | |
| app.run() | |