import gradio as gr import google.generativeai as genai from PyPDF2 import PdfReader from paddleocr import PaddleOCR import os # Step 1: Gemini API Key (must be set in Hugging Face Secrets) genai.configure(api_key=os.getenv("GEMINI_API_KEY")) model = genai.GenerativeModel('gemini-pro') # Step 2: OCR Setup ocr_model = PaddleOCR(use_angle_cls=True, lang='en') documents = [] def extract_text(file): ext = os.path.splitext(file.name)[1].lower() text = "" if ext == ".pdf": reader = PdfReader(file) for page in reader.pages: text += page.extract_text() or "" elif ext in [".jpg", ".jpeg", ".png"]: result = ocr_model.ocr(file.name) text = " ".join([line[1][0] for line in result[0]]) return text def process_files(files): global documents documents = [] for f in files: text = extract_text(f) documents.append({"filename": f.name, "text": text}) return f"{len(files)} files processed and stored." def answer_query(query): if not documents: return "Please upload and process files first." prompt = "You are a research assistant. Analyze the following documents and answer the query.\n" for i, doc in enumerate(documents): prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n" prompt += f"\n\nQuestion: {query}\nAnswer with key themes and cite document numbers." response = model.generate_content(prompt) return response.text # Step 3: Gradio Interface with gr.Blocks() as demo: gr.Markdown("# 📄 Gemini Document Research & Theme Identification Chatbot") with gr.Row(): file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents") process_btn = gr.Button("Process Documents") process_output = gr.Textbox(label="Processing Status") with gr.Row(): query_input = gr.Textbox(label="Ask a question") query_btn = gr.Button("Get Answer") answer_output = gr.Textbox(label="Answer with Themes and Citations", lines=10) process_btn.click(fn=process_files, inputs=[file_input], outputs=[process_output]) query_btn.click(fn=answer_query, inputs=[query_input], outputs=[answer_output]) demo.launch()