from flask import Flask import gradio as gr import pandas as pd from bertopic import BERTopic from sentence_transformers import SentenceTransformer import re app = Flask(__name__) # Function to clean text def clean_text(text): text = text.lower() text = re.sub(r"http\S+", "", text) # Remove URLs text = re.sub(r"[^a-z\s]", "", text) # Remove special characters text = re.sub(r"\s+", " ", text) # Remove extra whitespace return text.strip() # Function to perform topic modeling def extract_topics(): # Read CSV file df = pd.read_csv("titles.csv") # Drop rows with missing titles df = df.dropna(subset=["title"]) # Preprocess titles df["clean_title"] = df["title"].apply(clean_text) # Initialize embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Initialize BERTopic model topic_model = BERTopic(embedding_model=embedding_model, min_topic_size=2) # Fit the model topics, _ = topic_model.fit_transform(df["clean_title"].tolist()) # Get topic information topic_info = topic_model.get_topic_info() return topic_info[["Topic", "Name", "Count"]].to_string(index=False) # Gradio interface demo = gr.Interface(fn=extract_topics, inputs=[], outputs="text") @app.route("/") def home(): return demo.launch(share=False, inline=True) if __name__ == "__main__": app.run()