Spaces:

tanya17
/

smartchabot

Sleeping

tanya17 commited on Jun 12, 2025

Commit

e314452

verified ·

1 Parent(s): 7cb4c71

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import gradio as gr
-import google.generativeai as genai
 from PyPDF2 import PdfReader
 from paddleocr import PaddleOCR
 import os
-# Step 1: Gemini API Key (must be set in Hugging Face Secrets)
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-model = genai.GenerativeModel('gemini-pro')
-# Step 2: OCR Setup
 ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
 documents = []
@@ -36,17 +38,20 @@ def answer_query(query):
     if not documents:
         return "Please upload and process files first."
-    prompt = "You are a research assistant. Analyze the following documents and answer the query.\n"
     for i, doc in enumerate(documents):
         prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
-    prompt += f"\n\nQuestion: {query}\nAnswer with key themes and cite document numbers."
-    response = model.generate_content(prompt)
-    return response.text
-# Step 3: Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Gemini Document Research & Theme Identification Chatbot")
     with gr.Row():
         file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")

 import gradio as gr
 from PyPDF2 import PdfReader
 from paddleocr import PaddleOCR
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import os
+# Load Local Model (No API)
+model_name = "google/flan-t5-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+local_llm = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
+# OCR Setup
 ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
 documents = []
     if not documents:
         return "Please upload and process files first."
+    prompt = "Analyze the following documents and answer the query:\n"
     for i, doc in enumerate(documents):
         prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
+    prompt += f"\n\nQuestion: {query}\nAnswer with themes and citations."
+    try:
+        response = local_llm(prompt, max_length=256, do_sample=True, temperature=0.7)
+        return response[0]['generated_text']
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Document Theme Identification Chatbot (Offline Hugging Face Model)")
     with gr.Row():
         file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")