Spaces:

rwayz
/

book_chat

Sleeping

App Files Files Community

book_chat / app.py

rwayz

Update app.py

b370be5 verified 8 months ago

raw

history blame contribute delete

3.95 kB

	from huggingface_hub import InferenceClient
	import PyPDF2
	import gradio as gr
	import os

	API_KEY = os.getenv("HF_API_KEY")

	client = InferenceClient(api_key=API_KEY)

	def load_pdf_content(pdf_path):
	try:
	with open(pdf_path, "rb") as file:
	reader = PyPDF2.PdfReader(file)
	text = "".join(page.extract_text() for page in reader.pages)
	if text.strip(): # Certifique-se de que o texto não é vazio
	print("PDF carregado com sucesso!")
	print(f"Conteúdo inicial do PDF:\n{text[:500]}\n...")
	return text
	else:
	print("O PDF foi carregado, mas está vazio ou ilegível.")
	return None
	except Exception as e:
	print(f"Erro ao carregar o PDF: {e}")
	return None

	def chunk_text(text, max_chunk_size=500):
	words = text.split()
	chunks = []
	current_chunk = []
	current_length = 0

	for word in words:
	if current_length + len(word) + 1 > max_chunk_size:
	chunks.append(" ".join(current_chunk))
	current_chunk = []
	current_length = 0
	current_chunk.append(word)
	current_length += len(word) + 1

	if current_chunk:
	chunks.append(" ".join(current_chunk))

	print(f"Texto dividido em {len(chunks)} chunks.")
	return chunks

	pdf_path = "test.pdf"
	book_content = load_pdf_content(pdf_path)

	if not book_content: # Verifica se o conteúdo do PDF está vazio ou é None
	print("Erro: O conteúdo do livro está vazio. Verifique o caminho do PDF e o formato do arquivo.")
	else:
	book_chunks = chunk_text(book_content)

	chat_history = []
	max_tokens = 4096

	system_prompt = (
	"Você é um assistente especializado que só pode responder perguntas com base no conteúdo do livro 'A Arte da Guerra' fornecido. "
	"Se a pergunta não estiver relacionada ao conteúdo do livro fornecido, responda: 'Desculpe, só posso responder perguntas sobre o conteúdo do livro 'A Arte da Guerra'.'"
	)

	chat_history.append({"role": "system", "content": system_prompt})

	def truncate_history(history, max_tokens):
	"""Trunca o histórico para garantir que não ultrapasse o limite de tokens."""
	total_tokens = 0
	truncated_history = []

	for message in reversed(history):
	tokens = len(message["content"].split())
	if total_tokens + tokens > max_tokens:
	break
	truncated_history.insert(0, message)
	total_tokens += tokens

	print(f"Histórico truncado para {len(truncated_history)} mensagens, total de tokens: {total_tokens}")
	return truncated_history

	def chat_bot_gradio(user_input):
	chat_history.append({"role": "user", "content": user_input})

	truncated_history = truncate_history(chat_history, max_tokens)

	try:
	stream = client.chat.completions.create(
	model="meta-llama/Llama-3.3-70B-Instruct",
	messages=truncated_history,
	max_tokens=500,
	stream=True
	)

	bot_response = ""
	for chunk in stream:
	content = chunk.choices[0].delta.get("content", "")
	bot_response += content

	chat_history.append({"role": "assistant", "content": bot_response})
	return chat_history

	except Exception as e:
	return chat_history + [{"role": "assistant", "content": f"Erro: {e}"}]

	def gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Clone Especialista no Livro 'A Arte da Guerra'")
	chatbot = gr.Chatbot(label="Chatbot", type="messages")
	user_input = gr.Textbox(label="Digite sua pergunta")
	submit_button = gr.Button("Enviar")

	def respond(message):
	updated_history = chat_bot_gradio(message)
	return updated_history

	submit_button.click(respond, inputs=[user_input], outputs=[chatbot])

	return demo

	if __name__ == "__main__":
	demo = gradio_interface()
	demo.launch()