Spaces:
Runtime error
Runtime error
Upload with huggingface_hub
Browse files- Dockerfile +20 -0
- app.py +30 -0
- requirements.txt +2 -0
- workcell.yaml +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.8
|
| 5 |
+
|
| 6 |
+
# Set up a new user named "user" with user ID 1000
|
| 7 |
+
RUN useradd -m -u 1000 user
|
| 8 |
+
# Switch to the "user" user
|
| 9 |
+
USER user
|
| 10 |
+
# Set home to the user's home directory
|
| 11 |
+
ENV HOME=/home/user \
|
| 12 |
+
PATH=/home/user/.local/bin:$PATH
|
| 13 |
+
# Set the working directory to the user's home directory
|
| 14 |
+
WORKDIR $HOME/app
|
| 15 |
+
|
| 16 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 17 |
+
COPY --chown=user . $HOME/app
|
| 18 |
+
RUN pip install --no-cache-dir --upgrade -r $HOME/app/requirements.txt
|
| 19 |
+
|
| 20 |
+
CMD ["workcell", "serve", "--config", "workcell.yaml", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tweety.bot import Twitter
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from workcell.integrations.types import PerspectiveTable
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class Input(BaseModel):
|
| 8 |
+
username: str = Field(default="sama", description="Twitter username of the person you want to scrape")
|
| 9 |
+
|
| 10 |
+
def fetch_twitter_by_id(username):
|
| 11 |
+
# app = Twitter("elonmusk")
|
| 12 |
+
app = Twitter(username)
|
| 13 |
+
# Get 20 Tweets of a user
|
| 14 |
+
all_tweets = app.get_tweets()
|
| 15 |
+
return all_tweets
|
| 16 |
+
|
| 17 |
+
def process_tweets(tweets):
|
| 18 |
+
all_tweets = [tweet.to_dict() for tweet in tweets]
|
| 19 |
+
# pandas dataframe
|
| 20 |
+
df = pd.DataFrame(all_tweets)
|
| 21 |
+
# filter
|
| 22 |
+
filter_columns = ['created_on', 'text', 'likes','reply_counts', 'retweet_counts', 'id']
|
| 23 |
+
df = df[filter_columns]
|
| 24 |
+
return df
|
| 25 |
+
|
| 26 |
+
def twitter_scraper(input: Input) -> PerspectiveTable:
|
| 27 |
+
"""Returns latest 20 tweets of given usename, such as 'elonmusk'. """
|
| 28 |
+
all_tweets = fetch_twitter_by_id(username=input.username)
|
| 29 |
+
df = process_tweets(all_tweets)
|
| 30 |
+
return PerspectiveTable(data=df)
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
workcell
|
| 2 |
+
tweety-ns
|
workcell.yaml
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
workcell_name: twitter_scraper
|
| 2 |
+
workcell_provider: huggingface
|
| 3 |
+
workcell_id: weanalyze/twitter_scraper
|
| 4 |
+
workcell_version: latest
|
| 5 |
+
workcell_runtime: python3.8
|
| 6 |
+
workcell_entrypoint: app:twitter_scraper
|
| 7 |
+
workcell_code:
|
| 8 |
+
ImageUri: ''
|
| 9 |
+
workcell_tags: {}
|
| 10 |
+
workcell_envs: {}
|