first_commit: docker instance, main methods
Browse files- Dockerfile +27 -0
- README.md +0 -2
- api/__init__.py +0 -0
- api/core/app.py +74 -0
- api/core/controllers/extractor.py +24 -0
- api/core/controllers/handlers.py +13 -0
- api/core/controllers/text2text.py +43 -0
- api/main.py +83 -0
- requirements.txt +29 -0
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
+
RUN apt update && \
|
| 5 |
+
apt install -y bash \
|
| 6 |
+
build-essential \
|
| 7 |
+
git \
|
| 8 |
+
git-lfs \
|
| 9 |
+
curl \
|
| 10 |
+
ca-certificates \
|
| 11 |
+
libsndfile1-dev \
|
| 12 |
+
libgl1 \
|
| 13 |
+
python3.8 \
|
| 14 |
+
python3-pip \
|
| 15 |
+
python3.8-venv && \
|
| 16 |
+
rm -rf /var/lib/apt/lists
|
| 17 |
+
WORKDIR /code
|
| 18 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 19 |
+
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
| 20 |
+
python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 21 |
+
RUN useradd -m -u 1000 user
|
| 22 |
+
USER user
|
| 23 |
+
ENV HOME=/home/user \
|
| 24 |
+
PATH=/home/user/.local/bin:$PATH
|
| 25 |
+
WORKDIR $HOME/server
|
| 26 |
+
COPY --chown=user . $HOME/server
|
| 27 |
+
CMD ["uvicorn", "api.main:api", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -6,5 +6,3 @@ colorTo: purple
|
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
-
|
| 10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
---
|
|
|
|
|
|
api/__init__.py
ADDED
|
File without changes
|
api/core/app.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Demos:
|
| 2 |
+
def __init__(self):
|
| 3 |
+
from fastapi import FastAPI, HTTPException, Response
|
| 4 |
+
self.api = FastAPI
|
| 5 |
+
self.exception = HTTPException
|
| 6 |
+
self.api_response = Response
|
| 7 |
+
def validate_apikey(self,api_key)->bool:
|
| 8 |
+
__validation = True
|
| 9 |
+
return __validation
|
| 10 |
+
@staticmethod
|
| 11 |
+
def obtener_texto(from_url:str=None,from_pdf:str=None)->str:
|
| 12 |
+
"""Obtiene texto a partir de una fuente de información: desde url o desde pdf.
|
| 13 |
+
|
| 14 |
+
args:
|
| 15 |
+
from_url (str) : Url desde la que se desea obtener información.
|
| 16 |
+
from_pdf (str) : Pdf desde el que se desea obtener información.
|
| 17 |
+
return:
|
| 18 |
+
_texto (str) : Texto extraído desde la fuente dada.
|
| 19 |
+
"""
|
| 20 |
+
from api.core.controllers.extractor import TextFromURL
|
| 21 |
+
if from_url:
|
| 22 |
+
with TextFromURL(url=from_url) as web_content:
|
| 23 |
+
_texto = web_content.get_raw().get_text() if web_content.is_ok() else web_content.get_error()
|
| 24 |
+
elif from_pdf:
|
| 25 |
+
_texto = str("PDF")
|
| 26 |
+
else:
|
| 27 |
+
_texto = str("Ninguna opción seleccionada")
|
| 28 |
+
return _texto
|
| 29 |
+
@staticmethod
|
| 30 |
+
def generar_bloques(texto:str=None,size:int=1500)->list:
|
| 31 |
+
"""Genera bloques de texto de longitud S, a partir de un texto plano.
|
| 32 |
+
|
| 33 |
+
args:
|
| 34 |
+
texto (str) : Texto para generar bloques.
|
| 35 |
+
s (int) : Longitud de los bloques de texto, default 1500 caracteres.
|
| 36 |
+
return:
|
| 37 |
+
_bloques (list) : Lista de bloques de textos de longitud s.
|
| 38 |
+
"""
|
| 39 |
+
from api.core.controllers.handlers import Generator
|
| 40 |
+
_bloques = Generator.get_bloques(from_text=texto,size=size)
|
| 41 |
+
return _bloques
|
| 42 |
+
@staticmethod
|
| 43 |
+
def traducir(texto:str=None, idioma:str="EN/ES")->str:
|
| 44 |
+
"""Genera una traducción del texto dado.
|
| 45 |
+
|
| 46 |
+
args:
|
| 47 |
+
texto (str) : Texto para traducir
|
| 48 |
+
idioma (str) : Idioma, default "EN/ES"
|
| 49 |
+
return:
|
| 50 |
+
_traduccion (str) : Traducción del texto
|
| 51 |
+
"""
|
| 52 |
+
from api.core.controllers.text2text import Traductor
|
| 53 |
+
_traduccion = str()
|
| 54 |
+
if "EN/ES" in idioma.upper():
|
| 55 |
+
_traduccion = Traductor.EN_ES(texto=texto)
|
| 56 |
+
elif "ES/EN" in idioma.upper():
|
| 57 |
+
_traduccion = Traductor.ES_EN(texto=texto)
|
| 58 |
+
elif "AR/ES" in idioma.upper():
|
| 59 |
+
_traduccion = Traductor.AR_ES(texto=texto)
|
| 60 |
+
else:
|
| 61 |
+
_traduccion = "Idioma no válido"
|
| 62 |
+
return _traduccion
|
| 63 |
+
@staticmethod
|
| 64 |
+
def resumir(texto:str=None)->str:
|
| 65 |
+
"""Genera un resumen del texto dado.
|
| 66 |
+
|
| 67 |
+
args:
|
| 68 |
+
texto (str) : Texto para generar resumen
|
| 69 |
+
return:
|
| 70 |
+
_resumen (str) : Resumen generado
|
| 71 |
+
"""
|
| 72 |
+
from api.core.controllers.text2text import Abstractor
|
| 73 |
+
_resumen = Abstractor.resumen(texto=texto)
|
| 74 |
+
return _resumen
|
api/core/controllers/extractor.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class TextFromURL:
|
| 2 |
+
__raw = None
|
| 3 |
+
def __init__(self, url:str):
|
| 4 |
+
self.__source=url
|
| 5 |
+
def __enter__(self):
|
| 6 |
+
try:
|
| 7 |
+
import requests as REQS
|
| 8 |
+
import bs4 as BS4
|
| 9 |
+
self.__raw = BS4.BeautifulSoup(REQS.get(self.get_source()).content, "html.parser")
|
| 10 |
+
except Exception as er:
|
| 11 |
+
print(er)
|
| 12 |
+
self.__error = er
|
| 13 |
+
finally:
|
| 14 |
+
return self
|
| 15 |
+
def __exit__(self, *args):
|
| 16 |
+
[print(e) for e in args if e is not None]
|
| 17 |
+
def get_source(self):
|
| 18 |
+
return self.__source
|
| 19 |
+
def get_raw(self):
|
| 20 |
+
return self.__raw
|
| 21 |
+
def get_error(self):
|
| 22 |
+
return self.__error
|
| 23 |
+
def is_ok(self):
|
| 24 |
+
return True if self.get_raw() else False
|
api/core/controllers/handlers.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Generator:
|
| 2 |
+
def get_bloques(from_text:str, size:int=1500):
|
| 3 |
+
_bloques = list()
|
| 4 |
+
_t = str(from_text)
|
| 5 |
+
if len(_t)>size:
|
| 6 |
+
_ = int(len(_t)/2)
|
| 7 |
+
_ts = [_t[n:n+_] for n in range(0,len(_t),_)]
|
| 8 |
+
for m in [0,1]:
|
| 9 |
+
_np = [nt for nt in Generator.get_bloques(from_text=_ts[m],size=size)]
|
| 10 |
+
_bloques+=_np
|
| 11 |
+
else:
|
| 12 |
+
_bloques.append(_t)
|
| 13 |
+
return _bloques
|
api/core/controllers/text2text.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline as Pipe
|
| 2 |
+
|
| 3 |
+
class Traductor:
|
| 4 |
+
def EN_ES(texto:str)->str:
|
| 5 |
+
try:
|
| 6 |
+
_traductor = Pipe("text2text-generation", model="Helsinki-NLP/opus-mt-en-es")
|
| 7 |
+
_traduccion = _traductor(texto)[0]
|
| 8 |
+
_response = _traduccion.get('generated_text')
|
| 9 |
+
except Exception as e:
|
| 10 |
+
_response = str(e)
|
| 11 |
+
finally:
|
| 12 |
+
return _response
|
| 13 |
+
|
| 14 |
+
def ES_EN(texto:str)->str:
|
| 15 |
+
try:
|
| 16 |
+
_traductor = Pipe("text2text-generation", model="Helsinki-NLP/opus-mt-es-en")
|
| 17 |
+
_traduccion = _traductor(texto)[0]
|
| 18 |
+
_response = _traduccion.get('generated_text')
|
| 19 |
+
except Exception as e:
|
| 20 |
+
_response = str(e)
|
| 21 |
+
finally:
|
| 22 |
+
return _response
|
| 23 |
+
|
| 24 |
+
def AR_ES(texto:str)->str:
|
| 25 |
+
try:
|
| 26 |
+
_traductor = Pipe("text2text-generation", model="Helsinki-NLP/opus-mt-ar-es")
|
| 27 |
+
_traduccion = _traductor(texto)[0]
|
| 28 |
+
_response = _traduccion.get('generated_text')
|
| 29 |
+
except Exception as e:
|
| 30 |
+
_response = str(e)
|
| 31 |
+
finally:
|
| 32 |
+
return _response
|
| 33 |
+
|
| 34 |
+
class Abstractor:
|
| 35 |
+
def resumen(texto:str)->str:
|
| 36 |
+
try:
|
| 37 |
+
_abstractor = Pipe("text2text-generation", model="facebook/bart-large-cnn")
|
| 38 |
+
_resumen = _abstractor(texto)[0]
|
| 39 |
+
_response = _resumen.get('generated_text')
|
| 40 |
+
except Exception as e:
|
| 41 |
+
_response = str(e)
|
| 42 |
+
finally:
|
| 43 |
+
return _response
|
api/main.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from api.core.app import Demos
|
| 2 |
+
__main = Demos()
|
| 3 |
+
api = __main.api()
|
| 4 |
+
|
| 5 |
+
@api.post("/texto_desde_web/", status_code=201)
|
| 6 |
+
def get_text_from_url(data:dict) -> dict:
|
| 7 |
+
__response=dict({"request_data":data})
|
| 8 |
+
try:
|
| 9 |
+
if data and 'url' in data:
|
| 10 |
+
__response['texto']=__main.obtener_texto(from_url=data.get('url'))
|
| 11 |
+
else:
|
| 12 |
+
raise __main.exception(status_code = 401, datail=f"Datos mal formados:\n{data}")
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(e)
|
| 15 |
+
#To-do ->agregar mas información en el error fecha, usuario, reqs
|
| 16 |
+
raise __main.exception(status_code = 403, datail=e)
|
| 17 |
+
finally:
|
| 18 |
+
return __response
|
| 19 |
+
|
| 20 |
+
@api.post("/texto_desde_pdf/", status_code=201)
|
| 21 |
+
def get_text_from_pdf(data:dict) -> dict:
|
| 22 |
+
__response=dict({"request_data":data})
|
| 23 |
+
try:
|
| 24 |
+
if data and 'pdf' in data:
|
| 25 |
+
__response['texto']=__main.obtener_texto(from_pdf=data.get('pdf'))
|
| 26 |
+
else:
|
| 27 |
+
raise __main.exception(status_code = 401, datail=f"Datos mal formados:\n{data}")
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(e)
|
| 30 |
+
#To-do ->agregar mas información en el error fecha, usuario, reqs
|
| 31 |
+
raise __main.exception(status_code = 403, datail=e)
|
| 32 |
+
finally:
|
| 33 |
+
return __response
|
| 34 |
+
|
| 35 |
+
@api.post("/generar_bloques/", status_code=201)
|
| 36 |
+
def get_blocks(data:dict) -> dict:
|
| 37 |
+
__response=dict({"request_data":data})
|
| 38 |
+
try:
|
| 39 |
+
if data and 'texto' in data and 'size' in data:
|
| 40 |
+
__response['original']=data.get('texto')
|
| 41 |
+
__response['bloques']=__main.generar_bloques(texto=data.get('texto'),
|
| 42 |
+
size=data.get('size'))
|
| 43 |
+
else:
|
| 44 |
+
raise __main.exception(status_code = 401, datail=f"Datos mal formados:\n{data}")
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(e)
|
| 47 |
+
#To-do ->agregar mas información en el error fecha, usuario, reqs
|
| 48 |
+
raise __main.exception(status_code = 403, datail=e)
|
| 49 |
+
finally:
|
| 50 |
+
return __response
|
| 51 |
+
|
| 52 |
+
@api.post("/traducir/", status_code=201)
|
| 53 |
+
def get_traduccion(data:dict) -> dict:
|
| 54 |
+
__response=dict({"request_data":data})
|
| 55 |
+
try:
|
| 56 |
+
if data and 'texto' in data and 'idioma' in data:
|
| 57 |
+
__response['original']= data.get('texto')
|
| 58 |
+
__response['traduccion']= __main.traducir(texto=data.get('texto'),
|
| 59 |
+
idioma=data.get('idioma'))
|
| 60 |
+
else:
|
| 61 |
+
raise __main.exception(status_code = 401, datail=f"Datos mal formados:\n{data}")
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(e)
|
| 64 |
+
#To-do ->agregar mas información en el error fecha, usuario, reqs
|
| 65 |
+
raise __main.exception(status_code = 403, datail=e)
|
| 66 |
+
finally:
|
| 67 |
+
return __response
|
| 68 |
+
|
| 69 |
+
@api.post("/resumir/", status_code=201)
|
| 70 |
+
def get_resumen(data:dict) -> dict:
|
| 71 |
+
__response=dict({"request_data":data})
|
| 72 |
+
try:
|
| 73 |
+
if data and 'texto' in data:
|
| 74 |
+
__response['original']= data.get('texto')
|
| 75 |
+
__response['resumen']= __main.resumir(texto=data.get('texto'))
|
| 76 |
+
else:
|
| 77 |
+
raise __main.exception(status_code = 401, datail=f"Datos mal formados:\n{data}")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(e)
|
| 80 |
+
#To-do ->agregar mas información en el error fecha, usuario, reqs
|
| 81 |
+
raise __main.exception(status_code = 403, datail=e)
|
| 82 |
+
finally:
|
| 83 |
+
return __response
|
requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
torchaudio
|
| 4 |
+
invisible_watermark
|
| 5 |
+
datasets
|
| 6 |
+
hf-doc-builder
|
| 7 |
+
huggingface-hub
|
| 8 |
+
Jinja2
|
| 9 |
+
librosa
|
| 10 |
+
numpy
|
| 11 |
+
scipy
|
| 12 |
+
tensorboard
|
| 13 |
+
omegaconf
|
| 14 |
+
pytorch-lightning
|
| 15 |
+
xformers
|
| 16 |
+
|
| 17 |
+
fastapi
|
| 18 |
+
pydantic
|
| 19 |
+
uvicorn
|
| 20 |
+
typing
|
| 21 |
+
requests
|
| 22 |
+
bs4
|
| 23 |
+
transformers
|
| 24 |
+
transformers[sentencepiece]
|
| 25 |
+
diffusers
|
| 26 |
+
diffusers[torch]
|
| 27 |
+
diffusers[flax]
|
| 28 |
+
accelerate
|
| 29 |
+
safetensors
|