Spaces:

tana45
/

smartchatbot

Sleeping

App Files Files Community

Create app.py

by tanya17 - opened Jun 12, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+66

-0

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+import google.generativeai as genai
+from PyPDF2 import PdfReader
+from paddleocr import PaddleOCR
+import os
+# Step 1: Gemini API Key (must be set in Hugging Face Secrets)
+genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+model = genai.GenerativeModel('gemini-pro')
+# Step 2: OCR Setup
+ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
+documents = []
+def extract_text(file):
+    ext = os.path.splitext(file.name)[1].lower()
+    text = ""
+    if ext == ".pdf":
+        reader = PdfReader(file)
+        for page in reader.pages:
+            text += page.extract_text() or ""
+    elif ext in [".jpg", ".jpeg", ".png"]:
+        result = ocr_model.ocr(file.name)
+        text = " ".join([line[1][0] for line in result[0]])
+    return text
+def process_files(files):
+    global documents
+    documents = []
+    for f in files:
+        text = extract_text(f)
+        documents.append({"filename": f.name, "text": text})
+    return f"{len(files)} files processed and stored."
+def answer_query(query):
+    if not documents:
+        return "Please upload and process files first."
+    prompt = "You are a research assistant. Analyze the following documents and answer the query.\n"
+    for i, doc in enumerate(documents):
+        prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
+    prompt += f"\n\nQuestion: {query}\nAnswer with key themes and cite document numbers."
+    response = model.generate_content(prompt)
+    return response.text
+# Step 3: Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Gemini Document Research & Theme Identification Chatbot")
+    with gr.Row():
+        file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")
+        process_btn = gr.Button("Process Documents")
+    process_output = gr.Textbox(label="Processing Status")
+    with gr.Row():
+        query_input = gr.Textbox(label="Ask a question")
+        query_btn = gr.Button("Get Answer")
+    answer_output = gr.Textbox(label="Answer with Themes and Citations", lines=10)
+    process_btn.click(fn=process_files, inputs=[file_input], outputs=[process_output])
+    query_btn.click(fn=answer_query, inputs=[query_input], outputs=[answer_output])
+demo.launch()