Spaces:
Runtime error
Runtime error
Commit ·
9045c6e
1
Parent(s): e2103e5
updated hf_cache
Browse files- Dockerfile +3 -0
- app.py +2 -1
- reddit/reddit_sentiment_analysis.py +3 -2
- reddit/reddit_utils.py +2 -2
- utils.py +3 -1
Dockerfile
CHANGED
|
@@ -23,6 +23,9 @@ COPY . /app
|
|
| 23 |
# Make port 5000 available to the world outside this container
|
| 24 |
EXPOSE 8000
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
# Get secret GEMINI_1 and output it to /test at buildtime
|
| 27 |
RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
|
| 28 |
cat /run/secrets/GEMINI_1 > /test
|
|
|
|
| 23 |
# Make port 5000 available to the world outside this container
|
| 24 |
EXPOSE 8000
|
| 25 |
|
| 26 |
+
# Get secret TRANSFORMERS_CACHE and output it to /test at buildtime
|
| 27 |
+
RUN --mount=type=secret,id=TRANSFORMERS_CACHE,mode=0444,required=true \
|
| 28 |
+
cat /run/secrets/TRANSFORMERS_CACHE > /test
|
| 29 |
# Get secret GEMINI_1 and output it to /test at buildtime
|
| 30 |
RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
|
| 31 |
cat /run/secrets/GEMINI_1 > /test
|
app.py
CHANGED
|
@@ -18,7 +18,8 @@ from asyncio import TimeoutError
|
|
| 18 |
import pandas as pd
|
| 19 |
from scalar_fastapi import get_scalar_api_reference
|
| 20 |
from scalar_fastapi.scalar_fastapi import Layout
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
app = FastAPI(
|
| 24 |
debug=True,
|
|
|
|
| 18 |
import pandas as pd
|
| 19 |
from scalar_fastapi import get_scalar_api_reference
|
| 20 |
from scalar_fastapi.scalar_fastapi import Layout
|
| 21 |
+
# Set the new cache directory
|
| 22 |
+
os.environ["TRANSFORMERS_CACHE"] = "./cache"
|
| 23 |
|
| 24 |
app = FastAPI(
|
| 25 |
debug=True,
|
reddit/reddit_sentiment_analysis.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import ast
|
| 3 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
|
| 4 |
from transformers import pipeline
|
| 5 |
output=pd.DataFrame()
|
| 6 |
class SentimentAnalysis:
|
| 7 |
def __init__(self):
|
| 8 |
self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
|
| 9 |
-
self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
|
| 10 |
-
self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
|
| 11 |
def process_comment(self,comment):
|
| 12 |
|
| 13 |
sentence=[comment['comment'][:512]]
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import ast
|
| 3 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 4 |
+
from reddit.reddit_utils import cache_dir
|
| 5 |
from transformers import pipeline
|
| 6 |
output=pd.DataFrame()
|
| 7 |
class SentimentAnalysis:
|
| 8 |
def __init__(self):
|
| 9 |
self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
|
| 10 |
+
self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3,cache_dir=cache_dir)
|
| 11 |
+
self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone',cache_dir=cache_dir)
|
| 12 |
def process_comment(self,comment):
|
| 13 |
|
| 14 |
sentence=[comment['comment'][:512]]
|
reddit/reddit_utils.py
CHANGED
|
@@ -2,7 +2,7 @@ import time
|
|
| 2 |
|
| 3 |
import pandas as pd
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
-
|
| 6 |
def get_microseconds_list(length=3):
|
| 7 |
# Get the current time in microseconds
|
| 8 |
microseconds = int(time.time() * 1_000_000)
|
|
@@ -12,7 +12,7 @@ def get_microseconds_list(length=3):
|
|
| 12 |
|
| 13 |
|
| 14 |
def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
|
| 15 |
-
sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 16 |
if isForCompetitorAnalysis==True:
|
| 17 |
df=pd.read_csv(path1)
|
| 18 |
else:
|
|
|
|
| 2 |
|
| 3 |
import pandas as pd
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
+
from reddit.reddit_utils import cache_dir
|
| 6 |
def get_microseconds_list(length=3):
|
| 7 |
# Get the current time in microseconds
|
| 8 |
microseconds = int(time.time() * 1_000_000)
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
|
| 15 |
+
sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder=cache_dir)
|
| 16 |
if isForCompetitorAnalysis==True:
|
| 17 |
df=pd.read_csv(path1)
|
| 18 |
else:
|
utils.py
CHANGED
|
@@ -23,4 +23,6 @@ def time_execution(func):
|
|
| 23 |
print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
|
| 24 |
return result
|
| 25 |
return sync_wrapper
|
| 26 |
-
|
|
|
|
|
|
|
|
|
| 23 |
print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
|
| 24 |
return result
|
| 25 |
return sync_wrapper
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
cache_dir='./cache'
|