Spaces:

drmurataltun
/

GeminiDoc

Sleeping

App Files Files Community

GeminiDoc / app.py

drmurataltun

Update app.py

16a2b39 verified over 1 year ago

raw

history blame contribute delete

5.16 kB


	import streamlit as st
	import google.generativeai as genai
	import markdown
	from docx import Document
	from bs4 import BeautifulSoup
	from PyPDF2 import PdfFileReader
	import tempfile
	import os

	# Configure the API key
	genai.configure(api_key=os.getenv('gemini_api'))

	# Function to convert PDF to text
	def pdf_to_text(file):
	with open(file, 'rb') as f:
	pdf = PdfFileReader(f)
	text = ""
	for page_num in range(pdf.numPages):
	page = pdf.getPage(page_num)
	text += page.extract_text()
	return text

	# Function to upload file to the Generative AI API
	def upload_file(file_path):
	st.write("Uploading file...")
	text_file = genai.upload_file(path=file_path)
	st.write(f"Completed upload: {text_file.uri}")
	return text_file

	# Function to convert text to Markdown
	def to_markdown(text):
	text = text.replace('•', ' *')
	return textwrap.indent(text, '> ', predicate=lambda _: True)

	chat_session = None

	# Function to build the model
	def build_model(text_file):
	global chat_session
	generation_config = {
	"temperature": 0.2,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=generation_config,
	system_instruction="""Yüklenen belgedeki bilgilere göre Türkçe cevap ver.
	Eğer sorunun cevabı belgede bulunmuyorsa 'Belgede Cevap Bulunmuyor' yaz.
	""",
	)

	chat_session = model.start_chat(history=[])
	response = chat_session.send_message(["Yüklenen belgeyi bir cümle ile özetle", text_file])

	st.markdown(to_markdown(response.text))

	# Function to interact with the chat model
	def chat(prompt):
	try:
	response = chat_session.send_message(prompt)
	markdown_text = to_markdown(response.text)
	st.markdown(markdown_text)
	return response.text
	except ValueError:
	st.write(response.prompt_feedback)
	st.write(response.candidates[0].finish_reason)
	st.write(response.candidates[0].safety_ratings)
	except Exception as e:
	st.write("An unexpected error occurred:", e)

	# Function to generate a report based on questions
	def generate_report(questions):
	report_text = "\n## SORULARINIZ VE CEVAPLARI\n"
	for question in questions:
	report_text += f"\n## {question}\n"
	answer = chat(question)
	report_text += f"\n{answer}\n"
	return report_text

	# Function to convert Markdown to HTML
	def convert_Markdown_to_HTML(report_text):
	html_text = markdown.markdown(report_text)
	return html_text

	# Function to add HTML to a Word document
	def add_html_to_word(html_text, doc):
	soup = BeautifulSoup(html_text, 'html.parser')
	for element in soup:
	if element.name == 'h1':
	doc.add_heading(element.get_text(), level=1)
	elif element.name == 'h2':
	doc.add_heading(element.get_text(), level=2)
	elif element.name == 'h3':
	doc.add_heading(element.get_text(), level=3)
	elif element.name == 'h4':
	doc.add_heading(element.get_text(), level=4)
	elif element.name == 'h5':
	doc.add_heading(element.get_text(), level=5)
	elif element.name == 'h6':
	doc.add_heading(element.get_text(), level=6)
	elif element.name == 'p':
	doc.add_paragraph(element.get_text())
	elif element.name == 'ul':
	for li in element.find_all('li'):
	doc.add_paragraph(li.get_text(), style='List Bullet')
	elif element.name == 'ol':
	for li in element.find_all('li'):
	doc.add_paragraph(li.get_text(), style='List Number')
	elif element.name:
	doc.add_paragraph(element.get_text())

	# Streamlit interface
	st.title("REPORT GENERATOR: ASK YOUR QUESTIONS TO A PDF FILE by @drmurataltun")
	st.write("Upload a PDF to ask questions and get the answers.")

	uploaded_file = st.file_uploader("Upload PDF", type="pdf")
	questions_input = st.text_area("Enter Questions", placeholder="Type your questions here, one per line.", height=150)

	if uploaded_file and questions_input:
	with tempfile.NamedTemporaryFile(delete=False) as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file_path = temp_file.name

	text_content = pdf_to_text(temp_file_path)
	text_file = upload_file(temp_file_path)
	build_model(text_file)

	questions = questions_input.split("\n")
	report_text = generate_report(questions)

	html_text = convert_Markdown_to_HTML(report_text)
	doc = Document()
	add_html_to_word(html_text, doc)

	doc_name = os.path.basename(temp_file_path).replace(".pdf", ".docx")
	doc_name = "Rapor " + doc_name
	doc.save(doc_name)

	st.markdown(report_text)
	st.write("Document generated successfully!")

	with open(doc_name, "rb") as file:
	st.download_button(label="Download Report", data=file, file_name=doc_name)

	os.remove(temp_file_path)
	os.remove(doc_name)

	genai.delete_file(text_file.name)