test
Browse files- API_LOGS/log_file.log +10 -0
- Dockerfile +35 -0
- pretrained.py +72 -0
- requirements.txt +56 -0
API_LOGS/log_file.log
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-02-13 13:57:19,153:DEBUG:pretrained.py:<module>:54:loading all models...
|
| 2 |
+
2024-02-13 13:57:19,153:INFO:SentenceTransformer.py:__init__:110:Load pretrained SentenceTransformer: models--sentence-transformers--all-MiniLM-L6-v2/snapshots/1a310852cf8e58d22c5ebff537711d504ad4ad66
|
| 3 |
+
2024-02-13 13:57:19,298:INFO:SentenceTransformer.py:__init__:216:Use pytorch device_name: cpu
|
| 4 |
+
2024-02-13 13:57:19,302:INFO:_internal.py:_log:96:[31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
| 5 |
+
* Running on all addresses (0.0.0.0)
|
| 6 |
+
* Running on http://127.0.0.1:9669
|
| 7 |
+
* Running on http://192.168.1.11:9669
|
| 8 |
+
2024-02-13 13:57:19,302:INFO:_internal.py:_log:96:[33mPress CTRL+C to quit[0m
|
| 9 |
+
2024-02-13 13:57:24,234:INFO:_internal.py:_log:96:127.0.0.1 - - [13/Feb/2024 13:57:24] "[31m[1mPOST /match_text HTTP/1.1[0m" 400 -
|
| 10 |
+
2024-02-13 13:57:36,992:INFO:_internal.py:_log:96:127.0.0.1 - - [13/Feb/2024 13:57:36] "[31m[1mPOST /match_text HTTP/1.1[0m" 415 -
|
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.8
|
| 2 |
+
|
| 3 |
+
RUN mkdir /app
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
ADD . /app
|
| 8 |
+
|
| 9 |
+
# RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
|
| 10 |
+
|
| 11 |
+
# RUN apt update
|
| 12 |
+
|
| 13 |
+
# RUN apt install tesseract-ocr -y
|
| 14 |
+
|
| 15 |
+
RUN pip install -r requirements.txt
|
| 16 |
+
|
| 17 |
+
#GL_MODEL
|
| 18 |
+
# https://drive.google.com/file/d/1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f/view?usp=sharing
|
| 19 |
+
# RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1mgO6Y5_7EpJ9LpW1koXdfkd-U70MXL_f" -O GL_MODEL.zip && rm -rf /tmp/cookies.txt
|
| 20 |
+
|
| 21 |
+
#UL_MODEL
|
| 22 |
+
# https://drive.google.com/file/d/1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG/view?usp=sharing
|
| 23 |
+
# RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XFl6DBejzZ0nlCgz71yuY1LnvNHI9wWG" -O UL_MODEL.zip && rm -rf /tmp/cookies.txt
|
| 24 |
+
|
| 25 |
+
#core classify
|
| 26 |
+
# https://drive.google.com/file/d/1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N/view?usp=sharing
|
| 27 |
+
# RUN wget --no-check-certificate --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1yHls7Z8vUaPFSuCkv59E0qcOFwK4I75N" -O core.zip && rm -rf /tmp/cookies.txt
|
| 28 |
+
|
| 29 |
+
# RUN python -m spacy download en_core_web_md
|
| 30 |
+
# RUN unzip GL_MODEL.zip
|
| 31 |
+
# RUN unzip UL_MODEL.zip
|
| 32 |
+
# RUN unzip core.zip
|
| 33 |
+
|
| 34 |
+
EXPOSE 7860
|
| 35 |
+
CMD ["python", "pretrained.py"]
|
pretrained.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer, util
|
| 2 |
+
import pickle
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
from flask import Flask, request, jsonify
|
| 8 |
+
from werkzeug.utils import secure_filename
|
| 9 |
+
import logging
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Set up root logger, and add a file handler to root logger
|
| 13 |
+
logging.basicConfig(filename = 'API_LOGS/log_file.log',
|
| 14 |
+
filemode='w',
|
| 15 |
+
level = logging.DEBUG,
|
| 16 |
+
format = '%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s:%(lineno)d:%(message)s')
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
app = Flask(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@app.route('/match_text', methods=['POST'])
|
| 26 |
+
def similarity():
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
|
| 30 |
+
logger.debug(f'receiving the json data')
|
| 31 |
+
data = request.get_json()
|
| 32 |
+
logger.debug(f'received the json data')
|
| 33 |
+
|
| 34 |
+
if 'text1' not in data or 'text2' not in data:
|
| 35 |
+
logger.debug(f'Error : Both text1 and text2 must be provided!')
|
| 36 |
+
return jsonify({'error': 'Both text1 and text2 must be provided.'}), 400
|
| 37 |
+
|
| 38 |
+
logger.debug(f'extracting the sentences from the request')
|
| 39 |
+
sentences1 = data['text1']
|
| 40 |
+
sentences2 = data['text2']
|
| 41 |
+
logger.debug(f'extracted the sentences from the request')
|
| 42 |
+
|
| 43 |
+
logger.debug(f'calculating the embeddings')
|
| 44 |
+
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
|
| 45 |
+
embeddings2 = model.encode(sentences2, convert_to_tensor=True)
|
| 46 |
+
logger.debug(f'embeddings calculated')
|
| 47 |
+
|
| 48 |
+
logger.debug(f'calculating the cosine score')
|
| 49 |
+
cosine_scores = util.cos_sim(embeddings1, embeddings2)
|
| 50 |
+
logger.debug(f'calculated the cosine score')
|
| 51 |
+
|
| 52 |
+
print(f'{cosine_scores[0][0].item()}')
|
| 53 |
+
return jsonify({'similarity_score': cosine_scores[0][0].item()}), 200
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.debug(f'Unknown error! : {e}')
|
| 57 |
+
return jsonify({'error' : str(e)}), 500
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == '__main__':
|
| 62 |
+
|
| 63 |
+
logger.debug(f'loading model...')
|
| 64 |
+
print(f'loading model...')
|
| 65 |
+
|
| 66 |
+
# model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder='./')
|
| 67 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")#, cache_folder='./')
|
| 68 |
+
#model = SentenceTransformer("models--sentence-transformers--all-MiniLM-L6-v2/snapshots/1a310852cf8e58d22c5ebff537711d504ad4ad66")
|
| 69 |
+
model.max_seq_length = 512
|
| 70 |
+
print(f'model max lenght is :{model.max_seq_length}')
|
| 71 |
+
|
| 72 |
+
app.run(debug=False, port = 7860, host = '0.0.0.0', threaded = False)
|
requirements.txt
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
blinker==1.7.0
|
| 2 |
+
certifi==2024.2.2
|
| 3 |
+
charset-normalizer==3.3.2
|
| 4 |
+
click==8.1.7
|
| 5 |
+
filelock==3.13.1
|
| 6 |
+
Flask==3.0.2
|
| 7 |
+
fsspec==2024.2.0
|
| 8 |
+
huggingface-hub==0.20.3
|
| 9 |
+
idna==3.6
|
| 10 |
+
importlib-metadata==7.0.1
|
| 11 |
+
itsdangerous==2.1.2
|
| 12 |
+
Jinja2==3.1.3
|
| 13 |
+
joblib==1.3.2
|
| 14 |
+
MarkupSafe==2.1.5
|
| 15 |
+
mpmath==1.3.0
|
| 16 |
+
networkx==3.1
|
| 17 |
+
nltk==3.8.1
|
| 18 |
+
numpy==1.24.4
|
| 19 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 20 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 21 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 22 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 23 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 24 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 25 |
+
nvidia-curand-cu12==10.3.2.106
|
| 26 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 27 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 28 |
+
nvidia-nccl-cu12==2.19.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.3.101
|
| 30 |
+
nvidia-nvtx-cu12==12.1.105
|
| 31 |
+
packaging==23.2
|
| 32 |
+
pandas==2.0.3
|
| 33 |
+
pillow==10.2.0
|
| 34 |
+
python-dateutil==2.8.2
|
| 35 |
+
pytz==2024.1
|
| 36 |
+
PyYAML==6.0.1
|
| 37 |
+
regex==2023.12.25
|
| 38 |
+
requests==2.31.0
|
| 39 |
+
safetensors==0.4.2
|
| 40 |
+
scikit-learn==1.3.2
|
| 41 |
+
scipy==1.10.1
|
| 42 |
+
sentence-transformers==2.3.1
|
| 43 |
+
sentencepiece==0.1.99
|
| 44 |
+
six==1.16.0
|
| 45 |
+
sympy==1.12
|
| 46 |
+
threadpoolctl==3.2.0
|
| 47 |
+
tokenizers==0.15.2
|
| 48 |
+
torch==2.2.0
|
| 49 |
+
tqdm==4.66.2
|
| 50 |
+
transformers==4.37.2
|
| 51 |
+
triton==2.2.0
|
| 52 |
+
typing_extensions==4.9.0
|
| 53 |
+
tzdata==2024.1
|
| 54 |
+
urllib3==2.2.0
|
| 55 |
+
Werkzeug==3.0.1
|
| 56 |
+
zipp==3.17.0
|