Spaces:

dindizz
/

sabhascholar

Sleeping

App Files Files Community

sabhascholar / app.py

dindizz

Update app.py

4bb5912 verified over 1 year ago

raw

history blame contribute delete

2.15 kB

	import openai
	import gradio as gr
	import os
	from pdf2image import convert_from_path
	import pytesseract
	from PIL import Image
	import glob

	# Access the OpenAI API key from environment variables (Hugging Face secret)
	openai.api_key = os.getenv('OPENAI_API_KEY')

	# Directory where the PDF files are stored
	pdf_directory = '/path_to_pdf_files' # Change this to your actual dataset path

	def pdf_to_text(pdf_path):
	"""
	Converts PDF pages to images and extracts text using OCR.
	"""
	images = convert_from_path(pdf_path)
	full_text = ""

	for image in images:
	# Perform OCR on each image
	text = pytesseract.image_to_string(image)
	full_text += text + "\n"

	return full_text

	def extract_info(query):
	"""
	This function interacts with OpenAI GPT-3.5 Turbo to extract information from the dataset based on the user's query.
	"""
	all_texts = []

	# Loop through all PDF files in the directory
	for pdf_path in glob.glob(f'{pdf_directory}/*.pdf'):
	pdf_text = pdf_to_text(pdf_path)
	all_texts.append(pdf_text)

	combined_text = "\n".join(all_texts)

	# Send combined text and query to OpenAI for extraction
	prompt = f"Extract relevant information based on the following query: '{query}' from the Madras Music Academy Souvenir archives: {combined_text[:2000]}"

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are an assistant that extracts information from PDF files using OCR."},
	{"role": "user", "content": prompt}
	],
	max_tokens=300
	)

	# Return the answer from OpenAI GPT-3.5
	answer = response['choices'][0]['message']['content']
	return answer.strip()

	# Define the Gradio interface
	def gradio_interface(query):
	return extract_info(query)

	# Launch the Gradio app
	iface = gr.Interface(
	fn=gradio_interface,
	inputs="text",
	outputs="text",
	title="Sabha Scholar - Madras Music Academy AI Explorer",
	description="Ask questions about the Madras Music Academy Souvenirs."
	)

	iface.launch()