Spaces:

MIRNA-MOUKHTAR2025
/

topicmodelingspace

Runtime error

File size: 1,424 Bytes

from flask import Flask
import gradio as gr
import pandas as pd
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
import re

app = Flask(__name__)

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)  # Remove URLs
    text = re.sub(r"[^a-z\s]", "", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text)  # Remove extra whitespace
    return text.strip()

# Function to perform topic modeling
def extract_topics():
    # Read CSV file
    df = pd.read_csv("titles.csv")
    # Drop rows with missing titles
    df = df.dropna(subset=["title"])
    # Preprocess titles
    df["clean_title"] = df["title"].apply(clean_text)
    # Initialize embedding model
    embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
    # Initialize BERTopic model
    topic_model = BERTopic(embedding_model=embedding_model, min_topic_size=2)
    # Fit the model
    topics, _ = topic_model.fit_transform(df["clean_title"].tolist())
    # Get topic information
    topic_info = topic_model.get_topic_info()
    return topic_info[["Topic", "Name", "Count"]].to_string(index=False)

# Gradio interface
demo = gr.Interface(fn=extract_topics, inputs=[], outputs="text")

@app.route("/")
def home():
    return demo.launch(share=False, inline=True)

if __name__ == "__main__":
    app.run()