Spaces:

honeybansal23
/

nextAnalytics

Runtime error

honeybansal23 commited on Nov 24, 2024

Commit

9045c6e

1 Parent(s): e2103e5

updated hf_cache

Files changed (5) hide show

Dockerfile CHANGED Viewed

@@ -23,6 +23,9 @@ COPY . /app
 # Make port 5000 available to the world outside this container
 EXPOSE 8000
 # Get secret GEMINI_1 and output it to /test at buildtime
 RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
    cat /run/secrets/GEMINI_1 > /test

 # Make port 5000 available to the world outside this container
 EXPOSE 8000
+# Get secret TRANSFORMERS_CACHE and output it to /test at buildtime
+RUN --mount=type=secret,id=TRANSFORMERS_CACHE,mode=0444,required=true \
+   cat /run/secrets/TRANSFORMERS_CACHE > /test
 # Get secret GEMINI_1 and output it to /test at buildtime
 RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
    cat /run/secrets/GEMINI_1 > /test

app.py CHANGED Viewed

@@ -18,7 +18,8 @@ from asyncio import TimeoutError
 import pandas as pd
 from scalar_fastapi import get_scalar_api_reference
 from scalar_fastapi.scalar_fastapi import Layout
 app = FastAPI(
     debug=True,

 import pandas as pd
 from scalar_fastapi import get_scalar_api_reference
 from scalar_fastapi.scalar_fastapi import Layout
+# Set the new cache directory
+os.environ["TRANSFORMERS_CACHE"] = "./cache"
 app = FastAPI(
     debug=True,

reddit/reddit_sentiment_analysis.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import pandas as pd
 import ast
 from transformers import BertTokenizer, BertForSequenceClassification
 from transformers import pipeline
 output=pd.DataFrame()
 class SentimentAnalysis:
   def __init__(self):
     self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
-    self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
-    self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
   def process_comment(self,comment):
       sentence=[comment['comment'][:512]]

 import pandas as pd
 import ast
 from transformers import BertTokenizer, BertForSequenceClassification
+from reddit.reddit_utils import cache_dir
 from transformers import pipeline
 output=pd.DataFrame()
 class SentimentAnalysis:
   def __init__(self):
     self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
+    self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3,cache_dir=cache_dir)
+    self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone',cache_dir=cache_dir)
   def process_comment(self,comment):
       sentence=[comment['comment'][:512]]

reddit/reddit_utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ import time
 import pandas as pd
 from sentence_transformers import SentenceTransformer
 def get_microseconds_list(length=3):
     # Get the current time in microseconds
     microseconds = int(time.time() * 1_000_000)
@@ -12,7 +12,7 @@ def get_microseconds_list(length=3):
 def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
-  sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
   if isForCompetitorAnalysis==True:
       df=pd.read_csv(path1)
   else:

 import pandas as pd
 from sentence_transformers import SentenceTransformer
+from reddit.reddit_utils import cache_dir
 def get_microseconds_list(length=3):
     # Get the current time in microseconds
     microseconds = int(time.time() * 1_000_000)
 def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
+  sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder=cache_dir)
   if isForCompetitorAnalysis==True:
       df=pd.read_csv(path1)
   else:

utils.py CHANGED Viewed

@@ -23,4 +23,6 @@ def time_execution(func):
             print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
             return result
         return sync_wrapper

             print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
             return result
         return sync_wrapper
+cache_dir='./cache'