tanya17 commited on
Commit
e314452
·
verified ·
1 Parent(s): 7cb4c71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -1,14 +1,16 @@
1
  import gradio as gr
2
- import google.generativeai as genai
3
  from PyPDF2 import PdfReader
4
  from paddleocr import PaddleOCR
 
5
  import os
6
 
7
- # Step 1: Gemini API Key (must be set in Hugging Face Secrets)
8
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
- model = genai.GenerativeModel('gemini-pro')
 
 
10
 
11
- # Step 2: OCR Setup
12
  ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
13
  documents = []
14
 
@@ -36,17 +38,20 @@ def answer_query(query):
36
  if not documents:
37
  return "Please upload and process files first."
38
 
39
- prompt = "You are a research assistant. Analyze the following documents and answer the query.\n"
40
  for i, doc in enumerate(documents):
41
  prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
42
- prompt += f"\n\nQuestion: {query}\nAnswer with key themes and cite document numbers."
43
 
44
- response = model.generate_content(prompt)
45
- return response.text
 
 
 
46
 
47
- # Step 3: Gradio Interface
48
  with gr.Blocks() as demo:
49
- gr.Markdown("# 📄 Gemini Document Research & Theme Identification Chatbot")
50
 
51
  with gr.Row():
52
  file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")
 
1
  import gradio as gr
 
2
  from PyPDF2 import PdfReader
3
  from paddleocr import PaddleOCR
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
  import os
6
 
7
+ # Load Local Model (No API)
8
+ model_name = "google/flan-t5-base"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
+ local_llm = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
12
 
13
+ # OCR Setup
14
  ocr_model = PaddleOCR(use_angle_cls=True, lang='en')
15
  documents = []
16
 
 
38
  if not documents:
39
  return "Please upload and process files first."
40
 
41
+ prompt = "Analyze the following documents and answer the query:\n"
42
  for i, doc in enumerate(documents):
43
  prompt += f"\nDocument {i+1} ({doc['filename']}):\n{doc['text'][:2000]}\n"
44
+ prompt += f"\n\nQuestion: {query}\nAnswer with themes and citations."
45
 
46
+ try:
47
+ response = local_llm(prompt, max_length=256, do_sample=True, temperature=0.7)
48
+ return response[0]['generated_text']
49
+ except Exception as e:
50
+ return f"❌ Error: {str(e)}"
51
 
52
+ # Gradio Interface
53
  with gr.Blocks() as demo:
54
+ gr.Markdown("# 📄 Document Theme Identification Chatbot (Offline Hugging Face Model)")
55
 
56
  with gr.Row():
57
  file_input = gr.File(file_types=[".pdf", ".jpg", ".png"], file_count="multiple", label="Upload Documents")