honeybansal23 commited on
Commit
9045c6e
·
1 Parent(s): e2103e5

updated hf_cache

Browse files
Dockerfile CHANGED
@@ -23,6 +23,9 @@ COPY . /app
23
  # Make port 5000 available to the world outside this container
24
  EXPOSE 8000
25
 
 
 
 
26
  # Get secret GEMINI_1 and output it to /test at buildtime
27
  RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
28
  cat /run/secrets/GEMINI_1 > /test
 
23
  # Make port 5000 available to the world outside this container
24
  EXPOSE 8000
25
 
26
+ # Get secret TRANSFORMERS_CACHE and output it to /test at buildtime
27
+ RUN --mount=type=secret,id=TRANSFORMERS_CACHE,mode=0444,required=true \
28
+ cat /run/secrets/TRANSFORMERS_CACHE > /test
29
  # Get secret GEMINI_1 and output it to /test at buildtime
30
  RUN --mount=type=secret,id=GEMINI_1,mode=0444,required=true \
31
  cat /run/secrets/GEMINI_1 > /test
app.py CHANGED
@@ -18,7 +18,8 @@ from asyncio import TimeoutError
18
  import pandas as pd
19
  from scalar_fastapi import get_scalar_api_reference
20
  from scalar_fastapi.scalar_fastapi import Layout
21
-
 
22
 
23
  app = FastAPI(
24
  debug=True,
 
18
  import pandas as pd
19
  from scalar_fastapi import get_scalar_api_reference
20
  from scalar_fastapi.scalar_fastapi import Layout
21
+ # Set the new cache directory
22
+ os.environ["TRANSFORMERS_CACHE"] = "./cache"
23
 
24
  app = FastAPI(
25
  debug=True,
reddit/reddit_sentiment_analysis.py CHANGED
@@ -1,13 +1,14 @@
1
  import pandas as pd
2
  import ast
3
  from transformers import BertTokenizer, BertForSequenceClassification
 
4
  from transformers import pipeline
5
  output=pd.DataFrame()
6
  class SentimentAnalysis:
7
  def __init__(self):
8
  self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
9
- self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
10
- self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
11
  def process_comment(self,comment):
12
 
13
  sentence=[comment['comment'][:512]]
 
1
  import pandas as pd
2
  import ast
3
  from transformers import BertTokenizer, BertForSequenceClassification
4
+ from reddit.reddit_utils import cache_dir
5
  from transformers import pipeline
6
  output=pd.DataFrame()
7
  class SentimentAnalysis:
8
  def __init__(self):
9
  self.classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
10
+ self.finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3,cache_dir=cache_dir)
11
+ self.tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone',cache_dir=cache_dir)
12
  def process_comment(self,comment):
13
 
14
  sentence=[comment['comment'][:512]]
reddit/reddit_utils.py CHANGED
@@ -2,7 +2,7 @@ import time
2
 
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
5
-
6
  def get_microseconds_list(length=3):
7
  # Get the current time in microseconds
8
  microseconds = int(time.time() * 1_000_000)
@@ -12,7 +12,7 @@ def get_microseconds_list(length=3):
12
 
13
 
14
  def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
15
- sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
16
  if isForCompetitorAnalysis==True:
17
  df=pd.read_csv(path1)
18
  else:
 
2
 
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
5
+ from reddit.reddit_utils import cache_dir
6
  def get_microseconds_list(length=3):
7
  # Get the current time in microseconds
8
  microseconds = int(time.time() * 1_000_000)
 
12
 
13
 
14
  def topic_sort(path1,query, path2='', path3='',isForCompetitorAnalysis=False):
15
+ sentence_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder=cache_dir)
16
  if isForCompetitorAnalysis==True:
17
  df=pd.read_csv(path1)
18
  else:
utils.py CHANGED
@@ -23,4 +23,6 @@ def time_execution(func):
23
  print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
24
  return result
25
  return sync_wrapper
26
-
 
 
 
23
  print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds")
24
  return result
25
  return sync_wrapper
26
+
27
+
28
+ cache_dir='./cache'