Sami2000 commited on
Commit
84677f9
·
verified ·
1 Parent(s): 536ee73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -62
app.py CHANGED
@@ -32,6 +32,22 @@ subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'w
32
  query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
33
  max_articles = st.slider("Number of news articles:", 5, 25, 12)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  if st.button("Search"):
36
  # --- Fancy progress bar ---
37
  progress = st.progress(0, text="Fetching news...")
@@ -43,70 +59,36 @@ if st.button("Search"):
43
  if articles:
44
  progress.progress(40, text="Extracting keywords...")
45
  keywords = extract_keywords(articles)
46
-
 
 
 
 
47
  progress.progress(60, text="Searching Reddit...")
48
  reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
49
-
50
  progress.progress(80, text="Analyzing sentiment...")
51
- sentiments = analyze_sentiment(reddit_data)
52
- progress.progress(100, text="Done!")
53
-
54
- tab1, tab2, tab3, tab4 = st.tabs(["News", "Keywords", "Reddit", "Sentiment"])
55
-
56
- with tab1:
57
- st.subheader("News Articles")
58
- st.dataframe([
59
- {
60
- "Title": a.get("title", ""),
61
- "Source": a.get("source", ""),
62
- "Published": a.get("publishedAt", ""),
63
- "URL": a.get("url", "")
64
- } for a in articles[:max_articles]
65
- ])
66
-
67
- with tab2:
68
- st.subheader("Top Keywords")
69
- st.write(", ".join(keywords))
70
-
71
- with tab3:
72
- st.subheader("Reddit Comments")
73
- if reddit_data:
74
- comments = []
75
- if isinstance(reddit_data, dict):
76
- for v in reddit_data.values():
77
- comments.extend(v)
78
- elif isinstance(reddit_data, list):
79
- comments = reddit_data
80
-
81
- if comments:
82
- st.dataframe([
83
- {
84
- "Comment": c.get("body", "")[:140] + ("..." if len(c.get("body", "")) > 140 else ""),
85
- "Subreddit": c.get("subreddit", ""),
86
- "Upvotes": c.get("score", ""),
87
- }
88
- for c in comments[:30]
89
- ])
90
- else:
91
- st.info("No Reddit comments found.")
92
- else:
93
- st.info("No Reddit data found.")
94
-
95
- with tab4:
96
- st.subheader("Sentiment Results")
97
- if sentiments:
98
- df = pd.DataFrame(sentiments)
99
- st.dataframe(df[["body", "sentiment"]].rename(columns={"body": "Comment"}))
100
- # Show pie chart of sentiment
101
- sentiment_counts = df["sentiment"].value_counts().reset_index()
102
- sentiment_counts.columns = ["Sentiment", "Count"]
103
- fig = px.pie(sentiment_counts, names="Sentiment", values="Count",
104
- title="Sentiment Distribution")
105
- st.plotly_chart(fig, use_container_width=True)
106
- else:
107
- st.info("No sentiment data found.")
108
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  else:
110
- progress.empty()
111
- st.warning("No articles found for your search. Try a different query.")
112
- # --- END OF DASHBOARD CODE ---
 
32
  query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
33
  max_articles = st.slider("Number of news articles:", 5, 25, 12)
34
 
35
+ # --- CLEANING FUNCTION ---
36
+ def clean_keywords(keywords):
37
+ """
38
+ Remove blanks, punctuation-only, and duplicates (case-insensitive).
39
+ Returns a cleaned list of keywords.
40
+ """
41
+ cleaned = []
42
+ seen = set()
43
+ for kw in keywords:
44
+ kw = kw.strip()
45
+ # Only keep if non-empty and contains at least one alphanumeric character
46
+ if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
47
+ cleaned.append(kw)
48
+ seen.add(kw.lower())
49
+ return cleaned
50
+
51
  if st.button("Search"):
52
  # --- Fancy progress bar ---
53
  progress = st.progress(0, text="Fetching news...")
 
59
  if articles:
60
  progress.progress(40, text="Extracting keywords...")
61
  keywords = extract_keywords(articles)
62
+
63
+ # --- Clean up keywords ---
64
+ keywords = clean_keywords(keywords)
65
+ st.write("**Extracted Keywords for Reddit Search:**", keywords)
66
+
67
  progress.progress(60, text="Searching Reddit...")
68
  reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
69
+
70
  progress.progress(80, text="Analyzing sentiment...")
71
+ sentiment_results = analyze_sentiment([item["body"] for item in reddit_data])
72
+
73
+ # --- Display results ---
74
+ st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
75
+
76
+ # Create DataFrame for results
77
+ results_df = pd.DataFrame(reddit_data)
78
+ results_df['sentiment'] = sentiment_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ # Optional: Show data table
81
+ st.dataframe(results_df)
82
+
83
+ # Optional: Show a sentiment plot
84
+ sentiment_counts = results_df['sentiment'].value_counts()
85
+ fig = px.bar(
86
+ x=sentiment_counts.index,
87
+ y=sentiment_counts.values,
88
+ labels={'x': 'Sentiment', 'y': 'Count'},
89
+ title='Sentiment Distribution'
90
+ )
91
+ st.plotly_chart(fig, use_container_width=True)
92
  else:
93
+ st.warning("No news articles found for that query. Try a different topic or broaden the date range.")
94
+