Spaces:
Running
on
Zero
Running
on
Zero
Felipe Silva
commited on
Commit
·
c71745c
1
Parent(s):
8c92d61
Ajuste retorno do gr.change
Browse files
app.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import spaces
|
| 3 |
import torch
|
|
|
|
| 4 |
from utils import read_file_pdf, fix_type, extract_content_in_pdf, EXTENSIONS_FILES, EXTENSIONS_IMG_FILES
|
| 5 |
from rag_utils import create_split_doc, store_docs, create_rag_chain
|
| 6 |
|
| 7 |
zero = torch.Tensor([0]).cuda()
|
| 8 |
print(zero.device) # <-- 'cpu' 🤔
|
| 9 |
|
| 10 |
-
@spaces.GPU
|
| 11 |
-
def greet(n):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
|
| 16 |
|
| 17 |
def process_file(file):
|
| 18 |
if file is None:
|
| 19 |
-
return "Nenhum arquivo enviado."
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
| 24 |
file_obj, type_file = fix_type(file)
|
| 25 |
texto_extraido = None
|
| 26 |
|
|
@@ -28,16 +30,11 @@ def process_file(file):
|
|
| 28 |
texto_extraido = extract_content_in_pdf(file_obj)
|
| 29 |
elif type_file in EXTENSIONS_IMG_FILES:
|
| 30 |
texto_extraido = "OCR não implementado neste exemplo."
|
| 31 |
-
|
| 32 |
-
return texto_extraido or "Não foi possível extrair texto.", texto_extraido or ""
|
| 33 |
|
| 34 |
-
def ask_question(
|
| 35 |
-
texto_extraido, _ = process_file(file)
|
| 36 |
-
|
| 37 |
-
if not texto_extraido or texto_extraido.startswith("Não foi possível"):
|
| 38 |
-
return texto_extraido
|
| 39 |
|
| 40 |
-
#
|
| 41 |
docs_splitted = create_split_doc(texto_extraido)
|
| 42 |
vector_store = store_docs(docs_splitted)
|
| 43 |
rag_chain = create_rag_chain(vector_store)
|
|
@@ -46,9 +43,9 @@ def ask_question(file, question):
|
|
| 46 |
return resposta
|
| 47 |
|
| 48 |
with gr.Blocks() as demo:
|
| 49 |
-
gr.Markdown("## Pergunte qualquer coisa para seu arquivo.")
|
| 50 |
gr.Markdown(
|
| 51 |
-
"
|
| 52 |
)
|
| 53 |
|
| 54 |
with gr.Row():
|
|
@@ -60,7 +57,7 @@ with gr.Blocks() as demo:
|
|
| 60 |
answer_output = gr.Textbox(label="Resposta")
|
| 61 |
|
| 62 |
# Conecta funções
|
| 63 |
-
file_input.change(fn=process_file, inputs=file_input, outputs=
|
| 64 |
-
question_input.submit(fn=ask_question, inputs=[
|
| 65 |
|
| 66 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import spaces
|
| 3 |
import torch
|
| 4 |
+
import os
|
| 5 |
from utils import read_file_pdf, fix_type, extract_content_in_pdf, EXTENSIONS_FILES, EXTENSIONS_IMG_FILES
|
| 6 |
from rag_utils import create_split_doc, store_docs, create_rag_chain
|
| 7 |
|
| 8 |
zero = torch.Tensor([0]).cuda()
|
| 9 |
print(zero.device) # <-- 'cpu' 🤔
|
| 10 |
|
| 11 |
+
# @spaces.GPU
|
| 12 |
+
# def greet(n):
|
| 13 |
+
# print(zero.device) # <-- 'cuda:0' 🤗
|
| 14 |
+
# return f"Hello {zero + n} Tensor"
|
| 15 |
|
| 16 |
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
|
| 17 |
|
| 18 |
def process_file(file):
|
| 19 |
if file is None:
|
| 20 |
+
return "Nenhum arquivo enviado."
|
| 21 |
+
|
| 22 |
+
file_size = os.path.getsize(file)
|
| 23 |
+
if file_size > MAX_FILE_SIZE:
|
| 24 |
+
return f"O arquivo excede o limite. Por favor, realize o upload de um arquivo que contenha no máximo {MAX_FILE_SIZE/1024/1024:.1f}MB."
|
| 25 |
+
|
| 26 |
file_obj, type_file = fix_type(file)
|
| 27 |
texto_extraido = None
|
| 28 |
|
|
|
|
| 30 |
texto_extraido = extract_content_in_pdf(file_obj)
|
| 31 |
elif type_file in EXTENSIONS_IMG_FILES:
|
| 32 |
texto_extraido = "OCR não implementado neste exemplo."
|
| 33 |
+
return texto_extraido or "Não foi possível extrair texto."
|
|
|
|
| 34 |
|
| 35 |
+
def ask_question(texto_extraido, question):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
# RAG
|
| 38 |
docs_splitted = create_split_doc(texto_extraido)
|
| 39 |
vector_store = store_docs(docs_splitted)
|
| 40 |
rag_chain = create_rag_chain(vector_store)
|
|
|
|
| 43 |
return resposta
|
| 44 |
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
+
gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
|
| 47 |
gr.Markdown(
|
| 48 |
+
"🐶 Faça o upload do seu arquivo e pergunte qualquer coisa a ele! Este código é open source e disponível [aqui](https://github.com/FelipeErmeson) no GitHub. 😁"
|
| 49 |
)
|
| 50 |
|
| 51 |
with gr.Row():
|
|
|
|
| 57 |
answer_output = gr.Textbox(label="Resposta")
|
| 58 |
|
| 59 |
# Conecta funções
|
| 60 |
+
file_input.change(fn=process_file, inputs=file_input, outputs=extracted_text)
|
| 61 |
+
question_input.submit(fn=ask_question, inputs=[extracted_text, question_input], outputs=answer_output)
|
| 62 |
|
| 63 |
demo.launch()
|
utils.py
CHANGED
|
@@ -10,14 +10,11 @@ EXTENSIONS_ALLOWED = EXTENSIONS_IMG_FILES + EXTENSIONS_FILES
|
|
| 10 |
MAX_IMAGE_SIZE = 2000 # pixels
|
| 11 |
|
| 12 |
def fix_type(file_upload):
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
return read_file_img(file_upload), type_file
|
| 19 |
-
elif type_file in EXTENSIONS_FILES:
|
| 20 |
-
return read_file_pdf(file_upload), type_file
|
| 21 |
|
| 22 |
# Resize image while maintaining aspect ratio
|
| 23 |
def resize_image(image, max_size):
|
|
|
|
| 10 |
MAX_IMAGE_SIZE = 2000 # pixels
|
| 11 |
|
| 12 |
def fix_type(file_upload):
|
| 13 |
+
type_file = file_upload.split('/')[-1].split('.')[-1]
|
| 14 |
+
if type_file in EXTENSIONS_IMG_FILES:
|
| 15 |
+
return read_file_img(file_upload), type_file
|
| 16 |
+
elif type_file in EXTENSIONS_FILES:
|
| 17 |
+
return read_file_pdf(file_upload), type_file
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Resize image while maintaining aspect ratio
|
| 20 |
def resize_image(image, max_size):
|