dhruv107 commited on
Commit
4354477
·
1 Parent(s): 6029933
Files changed (4) hide show
  1. API_LOGS/log_file.log +10 -0
  2. Dockerfile +35 -0
  3. pretrained.py +72 -0
  4. requirements.txt +56 -0
API_LOGS/log_file.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-02-13 13:57:19,153:DEBUG:pretrained.py:<module>:54:loading all models...
2
+ 2024-02-13 13:57:19,153:INFO:SentenceTransformer.py:__init__:110:Load pretrained SentenceTransformer: models--sentence-transformers--all-MiniLM-L6-v2/snapshots/1a310852cf8e58d22c5ebff537711d504ad4ad66
3
+ 2024-02-13 13:57:19,298:INFO:SentenceTransformer.py:__init__:216:Use pytorch device_name: cpu
4
+ 2024-02-13 13:57:19,302:INFO:_internal.py:_log:96:WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
5
+ * Running on all addresses (0.0.0.0)
6
+ * Running on http://127.0.0.1:9669
7
+ * Running on http://192.168.1.11:9669
8
+ 2024-02-13 13:57:19,302:INFO:_internal.py:_log:96:Press CTRL+C to quit
9
+ 2024-02-13 13:57:24,234:INFO:_internal.py:_log:96:127.0.0.1 - - [13/Feb/2024 13:57:24] "POST /match_text HTTP/1.1" 400 -
10
+ 2024-02-13 13:57:36,992:INFO:_internal.py:_log:96:127.0.0.1 - - [13/Feb/2024 13:57:36] "POST /match_text HTTP/1.1" 415 -
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8
2
+
3
+ RUN mkdir /app
4
+
5
+ WORKDIR /app
6
+
7
+ ADD . /app
8
+
9
+ # RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
10
+
11
+ # RUN apt update
12
+
13
+ # RUN apt install tesseract-ocr -y
14
+
15
+ RUN pip install -r requirements.txt
16
+
17
+ #GL_MODEL
18
+ # https://drive.google.com/file/d/1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f/view?usp=sharing
19
+ # RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f" -O GL_MODEL.zip && rm -rf /tmp/cookies.txt
20
+
21
+ #UL_MODEL
22
+ # https://drive.google.com/file/d/1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG/view?usp=sharing
23
+ # RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG" -O UL_MODEL.zip && rm -rf /tmp/cookies.txt
24
+
25
+ #core classify
26
+ # https://drive.google.com/file/d/1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N/view?usp=sharing
27
+ # RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N" -O core.zip && rm -rf /tmp/cookies.txt
28
+
29
+ # RUN python -m spacy download en_core_web_md
30
+ # RUN unzip GL_MODEL.zip
31
+ # RUN unzip UL_MODEL.zip
32
+ # RUN unzip core.zip
33
+
34
+ EXPOSE 7860
35
+ CMD ["python", "pretrained.py"]
pretrained.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ import pickle
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+ import json
7
+ from flask import Flask, request, jsonify
8
+ from werkzeug.utils import secure_filename
9
+ import logging
10
+
11
+
12
+ # Set up root logger, and add a file handler to root logger
13
+ logging.basicConfig(filename = 'API_LOGS/log_file.log',
14
+ filemode='w',
15
+ level = logging.DEBUG,
16
+ format = '%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s:%(lineno)d:%(message)s')
17
+
18
+ logger = logging.getLogger()
19
+
20
+
21
+ app = Flask(__name__)
22
+
23
+
24
+
25
+ @app.route('/match_text', methods=['POST'])
26
+ def similarity():
27
+
28
+ try:
29
+
30
+ logger.debug(f'receiving the json data')
31
+ data = request.get_json()
32
+ logger.debug(f'received the json data')
33
+
34
+ if 'text1' not in data or 'text2' not in data:
35
+ logger.debug(f'Error : Both text1 and text2 must be provided!')
36
+ return jsonify({'error': 'Both text1 and text2 must be provided.'}), 400
37
+
38
+ logger.debug(f'extracting the sentences from the request')
39
+ sentences1 = data['text1']
40
+ sentences2 = data['text2']
41
+ logger.debug(f'extracted the sentences from the request')
42
+
43
+ logger.debug(f'calculating the embeddings')
44
+ embeddings1 = model.encode(sentences1, convert_to_tensor=True)
45
+ embeddings2 = model.encode(sentences2, convert_to_tensor=True)
46
+ logger.debug(f'embeddings calculated')
47
+
48
+ logger.debug(f'calculating the cosine score')
49
+ cosine_scores = util.cos_sim(embeddings1, embeddings2)
50
+ logger.debug(f'calculated the cosine score')
51
+
52
+ print(f'{cosine_scores[0][0].item()}')
53
+ return jsonify({'similarity_score': cosine_scores[0][0].item()}), 200
54
+
55
+ except Exception as e:
56
+ logger.debug(f'Unknown error! : {e}')
57
+ return jsonify({'error' : str(e)}), 500
58
+
59
+
60
+
61
+ if __name__ == '__main__':
62
+
63
+ logger.debug(f'loading model...')
64
+ print(f'loading model...')
65
+
66
+ # model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder='./')
67
+ model = SentenceTransformer("all-MiniLM-L6-v2")#, cache_folder='./')
68
+ #model = SentenceTransformer("models--sentence-transformers--all-MiniLM-L6-v2/snapshots/1a310852cf8e58d22c5ebff537711d504ad4ad66")
69
+ model.max_seq_length = 512
70
+ print(f'model max lenght is :{model.max_seq_length}')
71
+
72
+ app.run(debug=False, port = 7860, host = '0.0.0.0', threaded = False)
requirements.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blinker==1.7.0
2
+ certifi==2024.2.2
3
+ charset-normalizer==3.3.2
4
+ click==8.1.7
5
+ filelock==3.13.1
6
+ Flask==3.0.2
7
+ fsspec==2024.2.0
8
+ huggingface-hub==0.20.3
9
+ idna==3.6
10
+ importlib-metadata==7.0.1
11
+ itsdangerous==2.1.2
12
+ Jinja2==3.1.3
13
+ joblib==1.3.2
14
+ MarkupSafe==2.1.5
15
+ mpmath==1.3.0
16
+ networkx==3.1
17
+ nltk==3.8.1
18
+ numpy==1.24.4
19
+ nvidia-cublas-cu12==12.1.3.1
20
+ nvidia-cuda-cupti-cu12==12.1.105
21
+ nvidia-cuda-nvrtc-cu12==12.1.105
22
+ nvidia-cuda-runtime-cu12==12.1.105
23
+ nvidia-cudnn-cu12==8.9.2.26
24
+ nvidia-cufft-cu12==11.0.2.54
25
+ nvidia-curand-cu12==10.3.2.106
26
+ nvidia-cusolver-cu12==11.4.5.107
27
+ nvidia-cusparse-cu12==12.1.0.106
28
+ nvidia-nccl-cu12==2.19.3
29
+ nvidia-nvjitlink-cu12==12.3.101
30
+ nvidia-nvtx-cu12==12.1.105
31
+ packaging==23.2
32
+ pandas==2.0.3
33
+ pillow==10.2.0
34
+ python-dateutil==2.8.2
35
+ pytz==2024.1
36
+ PyYAML==6.0.1
37
+ regex==2023.12.25
38
+ requests==2.31.0
39
+ safetensors==0.4.2
40
+ scikit-learn==1.3.2
41
+ scipy==1.10.1
42
+ sentence-transformers==2.3.1
43
+ sentencepiece==0.1.99
44
+ six==1.16.0
45
+ sympy==1.12
46
+ threadpoolctl==3.2.0
47
+ tokenizers==0.15.2
48
+ torch==2.2.0
49
+ tqdm==4.66.2
50
+ transformers==4.37.2
51
+ triton==2.2.0
52
+ typing_extensions==4.9.0
53
+ tzdata==2024.1
54
+ urllib3==2.2.0
55
+ Werkzeug==3.0.1
56
+ zipp==3.17.0