MIRNA-MOUKHTAR2025 commited on
Commit
d660f54
·
verified ·
1 Parent(s): 7996d24

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +48 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from bertopic import BERTopic
3
+ from pymongo import MongoClient
4
+
5
+ # 🧠 Connect to MongoDB
6
+ mongo_uri = "mongodb+srv://flaskuser:0000@cluster0.axlhy4c.mongodb.net/Researchersedst?retryWrites=true&w=majority"
7
+ client = MongoClient(mongo_uri)
8
+
9
+
10
+ db = client["Researchersedst"]
11
+ collection = db["experiencesmk"]
12
+
13
+ # 🧪 Topic Modeling Function
14
+ def extract_topics_from_mongo(dummy_input=None):
15
+ # 🗃️ Fetch abstracts
16
+ articles = list(collection.find({"summary": {"$exists": True, "$ne": None}}, {"summary": 1, "_id": 0}))
17
+ texts = [article["summary"] for article in articles]
18
+
19
+ if not texts:
20
+ return "No abstracts found in MongoDB.", None
21
+
22
+ # 🧠 Run BERTopic
23
+ topic_model = BERTopic()
24
+ topics, _ = topic_model.fit_transform(texts)
25
+
26
+ # 📊 Topic Summary
27
+ topic_info = topic_model.get_topic_info()
28
+ top_topics = topic_info[['Topic', 'Name', 'Count']].head(10).to_string(index=False)
29
+
30
+ # 📎 Assign topics to texts
31
+ results = [{"topic": topic, "abstract": text} for topic, text in zip(topics, texts)]
32
+
33
+ return top_topics, results
34
+
35
+ # 🎛️ Gradio UI
36
+ demo = gr.Interface(
37
+ fn=extract_topics_from_mongo,
38
+ inputs=[],
39
+ outputs=[
40
+ gr.Textbox(label="Top Topics"),
41
+ gr.Dataframe(label="Abstracts with Assigned Topics")
42
+ ],
43
+ title="🧠 Topic Modeling from MongoDB",
44
+ description="This tool pulls abstracts from MongoDB and extracts topics using BERTopic."
45
+ )
46
+
47
+ if __name__ == "__main__":
48
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ bertopic
3
+ pandas
4
+ scikit-learn