Spaces:

tana45
/

smartchatbot

Sleeping

App Files Files Community

smartchatbot / app.py

tana45

Create app.py

0c7f9a4 verified 8 months ago

raw

history blame contribute delete

2.26 kB

	import gradio as gr
	import google.generativeai as genai
	from PyPDF2 import PdfReader
	from paddleocr import PaddleOCR
	import os

	# Step 1: Gemini API Key (must be set in Hugging Face Secrets)
	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
	model = genai.GenerativeModel('gemini-pro')

	# Step 2: OCR Setup
	ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
	documents = []

	def extract_text(file):
	ext = os.path.splitext(file.name)[1].lower()
	text = ""
	if ext == ".pdf":
	reader = PdfReader(file)
	for page in reader.pages:
	text += page.extract_text() or ""
	elif ext in [".jpg", ".jpeg", ".png"]:
	result = ocr_model.ocr(file.name)
	text = " ".join([line[1][0] for line in result[0]])
	return text

	def process_files(files):
	global documents
	documents = []
	for f in files:
	text = extract_text(f)
	documents.append({"filename": f.name, "text": text})
	return f"{len(files)} files processed and stored."

	def answer_query(query):
	if not documents:
	return "Please upload and process files first."

	prompt = "You are a research assistant. Analyze the following documents and answer the query.\n"
	for i, doc in enumerate(documents):
	prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
	prompt += f"\n\nQuestion: {query}\nAnswer with key themes and cite document numbers."

	response = model.generate_content(prompt)
	return response.text

	# Step 3: Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 Gemini Document Research & Theme Identification Chatbot")

	with gr.Row():
	file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")
	process_btn = gr.Button("Process Documents")

	process_output = gr.Textbox(label="Processing Status")

	with gr.Row():
	query_input = gr.Textbox(label="Ask a question")
	query_btn = gr.Button("Get Answer")

	answer_output = gr.Textbox(label="Answer with Themes and Citations", lines=10)

	process_btn.click(fn=process_files, inputs=[file_input], outputs=[process_output])
	query_btn.click(fn=answer_query, inputs=[query_input], outputs=[answer_output])

	demo.launch()