Spaces:

FelipeErmeson
/

projeto-rag

Running on Zero

projeto-rag / app.py

Felipe Silva

ajuste docling

fff96e7 3 months ago

2.91 kB

	import gradio as gr
	import spaces
	import torch
	import os
	from huggingface_hub import snapshot_download
	from utils import read_file_pdf, fix_type, extract_content_in_pdf, doc_converter, EXTENSIONS_FILES, EXTENSIONS_IMG_FILES
	from rag_utils import create_split_doc, store_docs, create_rag_chain
	import config

	zero = torch.Tensor([0]).cuda()
	print(zero.device) # <-- 'cpu' 🤔

	# @spaces.GPU
	# def greet(n):
	# print(zero.device) # <-- 'cuda:0' 🤗
	# return f"Hello {zero + n} Tensor"

	MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB

	name_model = "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8" #"TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	config.local_model_path = snapshot_download(
	repo_id=name_model,
	cache_dir="/root/.cache/huggingface",
	local_files_only=False
	)
	config.local_emb_path = snapshot_download(
	repo_id="sentence-transformers/all-MiniLM-L6-v2",
	cache_dir="/root/.cache/huggingface",
	local_files_only=False
	)

	def process_file(file):
	if file is None:
	return "Nenhum arquivo enviado."

	file_size = os.path.getsize(file)
	if file_size > MAX_FILE_SIZE:
	return f"O arquivo excede o limite. Por favor, realize o upload de um arquivo que contenha no máximo {MAX_FILE_SIZE/1024/1024:.1f}MB."

	file_obj, type_file = fix_type(file)
	texto_extraido = None

	if type_file in EXTENSIONS_FILES:
	texto_extraido = extract_content_in_pdf(file_obj)
	elif type_file in EXTENSIONS_IMG_FILES:
	texto_extraido = doc_converter(file)
	return texto_extraido or "Não foi possível extrair texto."

	@spaces.GPU
	def ask_question(texto_extraido, question):

	# RAG
	docs_splitted = create_split_doc(texto_extraido)
	vector_store = store_docs(docs_splitted)
	rag_chain = create_rag_chain(vector_store)

	resposta = rag_chain.run(question)
	return resposta

	def launch_app():
	with gr.Blocks() as demo:
	gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
	gr.Markdown(
	"🐶 Faça o upload do seu arquivo e pergunte qualquer coisa a ele! Este código é open source e disponível [aqui](https://github.com/FelipeErmeson) no GitHub. 😁"
	)

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload da Imagem ou PDF", file_types=[".png", ".jpg", ".jpeg", ".pdf"])
	extracted_text = gr.Textbox(label="Texto extraído", lines=15)
	with gr.Column():
	question_input = gr.Textbox(label="📌 Faça uma pergunta ao seu documento!")
	answer_output = gr.Textbox(label="🎩 Resposta", lines=20)

	# Conecta funções
	file_input.change(fn=process_file, inputs=file_input, outputs=extracted_text)
	question_input.submit(fn=ask_question, inputs=[extracted_text, question_input], outputs=answer_output)

	demo.launch()

	if __name__ == "__main__":
	launch_app()