MIRNA-MOUKHTAR2025's picture
Upload 3 files
e8e2080 verified
from flask import Flask
import gradio as gr
import pandas as pd
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
import re
app = Flask(__name__)
# Function to clean text
def clean_text(text):
text = text.lower()
text = re.sub(r"http\S+", "", text) # Remove URLs
text = re.sub(r"[^a-z\s]", "", text) # Remove special characters
text = re.sub(r"\s+", " ", text) # Remove extra whitespace
return text.strip()
# Function to perform topic modeling
def extract_topics():
# Read CSV file
df = pd.read_csv("titles.csv")
# Drop rows with missing titles
df = df.dropna(subset=["title"])
# Preprocess titles
df["clean_title"] = df["title"].apply(clean_text)
# Initialize embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Initialize BERTopic model
topic_model = BERTopic(embedding_model=embedding_model, min_topic_size=2)
# Fit the model
topics, _ = topic_model.fit_transform(df["clean_title"].tolist())
# Get topic information
topic_info = topic_model.get_topic_info()
return topic_info[["Topic", "Name", "Count"]].to_string(index=False)
# Gradio interface
demo = gr.Interface(fn=extract_topics, inputs=[], outputs="text")
@app.route("/")
def home():
return demo.launch(share=False, inline=True)
if __name__ == "__main__":
app.run()