Spaces:
Runtime error
Runtime error
Commit ·
b20b18b
1
Parent(s): 8897995
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,8 @@ import json
|
|
| 8 |
import logging
|
| 9 |
import sys
|
| 10 |
import spacy
|
| 11 |
-
|
|
|
|
| 12 |
import pandas as pd
|
| 13 |
import numpy as np
|
| 14 |
import os
|
|
@@ -22,17 +23,21 @@ from fastapi.staticfiles import StaticFiles
|
|
| 22 |
from fastapi.templating import Jinja2Templates
|
| 23 |
|
| 24 |
from rouge_score import rouge_scorer
|
|
|
|
| 25 |
import scripts.sentiment as sentiment
|
| 26 |
import scripts.twitter_scraper as ts
|
| 27 |
from scripts import sentiment
|
| 28 |
from scripts.summarization import bert_summarization
|
| 29 |
from scripts.twitter_scraper import get_latest_account_tweets
|
| 30 |
-
from scripts import
|
|
|
|
| 31 |
import scripts.utils as utils
|
|
|
|
| 32 |
from scripts import generative
|
| 33 |
import nltk
|
| 34 |
|
| 35 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 36 |
|
| 37 |
app = FastAPI()
|
| 38 |
templates = Jinja2Templates(directory="templates")
|
|
@@ -82,35 +87,34 @@ async def get_accounts() -> List[dict]:
|
|
| 82 |
|
| 83 |
@app.get("/tweets/{username}")
|
| 84 |
def get_tweets_username(username: str) -> dict:
|
| 85 |
-
# if username in username_list:
|
| 86 |
-
# query = f"from:{username} since:{start_date} until:{end_date}"
|
| 87 |
-
# return ts.get_tweets(query=query)
|
| 88 |
-
# else:
|
| 89 |
-
# return {"detail": "Account not in scope of project."}
|
| 90 |
-
|
| 91 |
-
# Method 1: Using Tweepy method
|
| 92 |
-
# df_tweets = get_latest_account_tweets(username)
|
| 93 |
-
|
| 94 |
# Method 2: Use Snscrape
|
| 95 |
df_tweets = ts.get_tweets(handle=username)
|
| 96 |
|
| 97 |
if isinstance(df_tweets, pd.DataFrame):
|
| 98 |
-
print(df_tweets.head(2))
|
| 99 |
-
print(df_tweets.shape)
|
| 100 |
df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
|
| 101 |
-
df_tweets["created_at"] = df_tweets["created_at"].dt.strftime(
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
|
| 104 |
-
df_tweets.to_html(open(
|
| 105 |
df_tweets_data = df_tweets.to_dict(orient="records")
|
| 106 |
-
|
| 107 |
-
response_data = {
|
| 108 |
-
"html": df_tweets_html,
|
| 109 |
-
"data": df_tweets_data
|
| 110 |
-
}
|
| 111 |
|
| 112 |
return JSONResponse(content=response_data, status_code=200)
|
| 113 |
-
# return HTMLResponse(content=df_tweets_html, status_code=200)
|
| 114 |
else:
|
| 115 |
print("Error: Failed to retrieve tweets.")
|
| 116 |
return df_tweets
|
|
@@ -214,6 +218,7 @@ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
|
|
| 214 |
}
|
| 215 |
|
| 216 |
|
|
|
|
| 217 |
@app.post("/api/generate")
|
| 218 |
async def generate_text(request: Request):
|
| 219 |
"""Generate text from a prompt.
|
|
@@ -269,10 +274,9 @@ async def generate_summary(request: Request):
|
|
| 269 |
|
| 270 |
print("*" * 50)
|
| 271 |
data = await request.json()
|
| 272 |
-
print(
|
| 273 |
# Get the list of text
|
| 274 |
-
tweets = [t[
|
| 275 |
-
|
| 276 |
|
| 277 |
# Concatenate tweets into a single string
|
| 278 |
text = " .".join(tweets)
|
|
@@ -281,35 +285,25 @@ async def generate_summary(request: Request):
|
|
| 281 |
nlp.add_pipe("sentencizer")
|
| 282 |
|
| 283 |
sentences = nlp(text).sents
|
| 284 |
-
|
| 285 |
-
# phrases = Phrases(
|
| 286 |
-
# sentences, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS
|
| 287 |
-
# )
|
| 288 |
-
# first_sentence = next(iter(sentences))
|
| 289 |
-
# first_sentence
|
| 290 |
sentences = list(sentences)
|
| 291 |
-
# # Shuffle the list
|
| 292 |
-
# random.shuffle(sentences)
|
| 293 |
-
# Option 1
|
| 294 |
-
# sampled_tweets = random.sample(tweets, int(0.1 * len(tweets)))
|
| 295 |
|
| 296 |
# Option 2
|
| 297 |
sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
|
| 298 |
-
|
| 299 |
sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
|
| 300 |
|
| 301 |
# Join the strings into one text blob
|
| 302 |
tweet_blob = " ".join(sampled_sentences)
|
| 303 |
|
| 304 |
# Generate the summary
|
| 305 |
-
summary = bert_summarization(
|
| 306 |
-
|
| 307 |
-
)
|
| 308 |
-
print("Summary:",summary)
|
| 309 |
# Return the summary
|
| 310 |
return {"tweets_summary": summary}
|
| 311 |
|
| 312 |
|
|
|
|
| 313 |
@app.get("/examples1")
|
| 314 |
async def read_examples():
|
| 315 |
with open("templates/charts/handle_sentiment_breakdown.html") as f:
|
|
@@ -322,3 +316,9 @@ async def read_examples():
|
|
| 322 |
with open("templates/charts/handle_sentiment_timesteps.html") as f:
|
| 323 |
html = f.read()
|
| 324 |
return HTMLResponse(content=html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import logging
|
| 9 |
import sys
|
| 10 |
import spacy
|
| 11 |
+
|
| 12 |
+
# sys.setrecursionlimit(20000)
|
| 13 |
import pandas as pd
|
| 14 |
import numpy as np
|
| 15 |
import os
|
|
|
|
| 23 |
from fastapi.templating import Jinja2Templates
|
| 24 |
|
| 25 |
from rouge_score import rouge_scorer
|
| 26 |
+
# Scripts
|
| 27 |
import scripts.sentiment as sentiment
|
| 28 |
import scripts.twitter_scraper as ts
|
| 29 |
from scripts import sentiment
|
| 30 |
from scripts.summarization import bert_summarization
|
| 31 |
from scripts.twitter_scraper import get_latest_account_tweets
|
| 32 |
+
from scripts.sentiment import twitter_sentiment_api_score
|
| 33 |
+
from scripts import twitter_scraper as ts
|
| 34 |
import scripts.utils as utils
|
| 35 |
+
from scripts import translation
|
| 36 |
from scripts import generative
|
| 37 |
import nltk
|
| 38 |
|
| 39 |
logging.basicConfig(level=logging.INFO)
|
| 40 |
+
pd.set_option('display.max_colwidth', 20)
|
| 41 |
|
| 42 |
app = FastAPI()
|
| 43 |
templates = Jinja2Templates(directory="templates")
|
|
|
|
| 87 |
|
| 88 |
@app.get("/tweets/{username}")
|
| 89 |
def get_tweets_username(username: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# Method 2: Use Snscrape
|
| 91 |
df_tweets = ts.get_tweets(handle=username)
|
| 92 |
|
| 93 |
if isinstance(df_tweets, pd.DataFrame):
|
|
|
|
|
|
|
| 94 |
df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
|
| 95 |
+
df_tweets["created_at"] = df_tweets["created_at"].dt.strftime(
|
| 96 |
+
"%Y-%m-%d %H:%M:%S"
|
| 97 |
+
)
|
| 98 |
+
df_tweets = df_tweets.sort_values("created_at", ascending=False)
|
| 99 |
+
|
| 100 |
+
# Additional processing
|
| 101 |
+
logging.info("Running sentiment on tweets")
|
| 102 |
+
sentiments = twitter_sentiment_api_score(
|
| 103 |
+
df_tweets['full_text'].to_list(), use_api=False
|
| 104 |
+
)
|
| 105 |
+
df_tweets["sentiment"] = [s['argmax'] for s in sentiments]
|
| 106 |
+
if username == "alikarimi_ak8":
|
| 107 |
+
p = translation.PersianTextProcessor()
|
| 108 |
+
df_tweets['full_text_translated'] = df_tweets["full_text"].apply(lambda c: p.translate_text(persian_text = c))
|
| 109 |
+
|
| 110 |
+
|
| 111 |
df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
|
| 112 |
+
df_tweets.to_html(open("df_tweets_html.html", "w"))
|
| 113 |
df_tweets_data = df_tweets.to_dict(orient="records")
|
| 114 |
+
|
| 115 |
+
response_data = {"html": df_tweets_html, "data": df_tweets_data}
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
return JSONResponse(content=response_data, status_code=200)
|
|
|
|
| 118 |
else:
|
| 119 |
print("Error: Failed to retrieve tweets.")
|
| 120 |
return df_tweets
|
|
|
|
| 218 |
}
|
| 219 |
|
| 220 |
|
| 221 |
+
## APIs: Primarily called by the index page
|
| 222 |
@app.post("/api/generate")
|
| 223 |
async def generate_text(request: Request):
|
| 224 |
"""Generate text from a prompt.
|
|
|
|
| 274 |
|
| 275 |
print("*" * 50)
|
| 276 |
data = await request.json()
|
| 277 |
+
print("data", data["tweetsData"])
|
| 278 |
# Get the list of text
|
| 279 |
+
tweets = [t["full_text"] for t in data["tweetsData"]]
|
|
|
|
| 280 |
|
| 281 |
# Concatenate tweets into a single string
|
| 282 |
text = " .".join(tweets)
|
|
|
|
| 285 |
nlp.add_pipe("sentencizer")
|
| 286 |
|
| 287 |
sentences = nlp(text).sents
|
| 288 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
sentences = list(sentences)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
# Option 2
|
| 292 |
sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
|
| 293 |
+
|
| 294 |
sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
|
| 295 |
|
| 296 |
# Join the strings into one text blob
|
| 297 |
tweet_blob = " ".join(sampled_sentences)
|
| 298 |
|
| 299 |
# Generate the summary
|
| 300 |
+
summary = bert_summarization(tweet_blob)
|
| 301 |
+
print("Summary:", summary)
|
|
|
|
|
|
|
| 302 |
# Return the summary
|
| 303 |
return {"tweets_summary": summary}
|
| 304 |
|
| 305 |
|
| 306 |
+
## Historical Tweets pages
|
| 307 |
@app.get("/examples1")
|
| 308 |
async def read_examples():
|
| 309 |
with open("templates/charts/handle_sentiment_breakdown.html") as f:
|
|
|
|
| 316 |
with open("templates/charts/handle_sentiment_timesteps.html") as f:
|
| 317 |
html = f.read()
|
| 318 |
return HTMLResponse(content=html)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
# uvicorn --workers=2 app:app
|
| 322 |
+
if __name__ == "__main__":
|
| 323 |
+
# uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 324 |
+
uvicorn.run("app:app", host="127.0.0.1", port=5050, reload=True)
|