gmustafa413 commited on
Commit
4e0ad31
·
verified ·
1 Parent(s): 436d2a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz
3
+ import numpy as np
4
+ import requests
5
+ import faiss
6
+ import re
7
+ import json
8
+ import pandas as pd
9
+ from docx import Document
10
+ from pptx import Presentation
11
+ from sentence_transformers import SentenceTransformer
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ import os
14
+
15
+ # Configuration - Get API key from environment variables
16
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
17
+ MODEL_NAME = "all-MiniLM-L6-v2"
18
+ CHUNK_SIZE = 1024
19
+ MAX_TOKENS = 4096
20
+ WORKERS = 8
21
+
22
+ # Initialize model with error handling
23
+ try:
24
+ MODEL = SentenceTransformer(MODEL_NAME, device='cpu')
25
+ except Exception as e:
26
+ raise RuntimeError(f"Failed to initialize model: {str(e)}")
27
+
28
+ class DocumentProcessor:
29
+ def __init__(self):
30
+ self.index = faiss.IndexFlatIP(MODEL.get_sentence_embedding_dimension())
31
+ self.chunks = []
32
+ self.processor_pool = ThreadPoolExecutor(max_workers=WORKERS)
33
+
34
+ # [Keep all the original document processing methods unchanged]
35
+ # ... (Include all the document processing methods from previous version) ...
36
+
37
+ # [Include the complete DocumentProcessor class implementation]
38
+ # ... (Include the full class implementation from previous version) ...
39
+
40
+ # Initialize processor
41
+ processor = DocumentProcessor()
42
+
43
+ # Gradio interface with improved error handling
44
+ with gr.Blocks(theme=gr.themes.Soft(), title="Document Chatbot") as app:
45
+ gr.Markdown("## 📚 Multi-Format Document Chatbot")
46
+
47
+ with gr.Row():
48
+ with gr.Column(scale=2):
49
+ files = gr.File(
50
+ file_count="multiple",
51
+ file_types=[".pdf", ".docx", ".txt", ".pptx", ".xls", ".xlsx", ".csv"],
52
+ label="Upload Documents",
53
+ max_size=500*1024*1024
54
+ )
55
+ process_btn = gr.Button("Process Documents", variant="primary")
56
+ status = gr.Textbox(label="Processing Status")
57
+
58
+ with gr.Column(scale=3):
59
+ chatbot = gr.Chatbot(height=500, label="Chat History")
60
+ question = gr.Textbox(
61
+ label="Ask a question",
62
+ placeholder="Type your question here...",
63
+ max_lines=3
64
+ )
65
+ with gr.Row():
66
+ ask_btn = gr.Button("Ask", variant="primary")
67
+ clear_btn = gr.Button("Clear Chat")
68
+
69
+ process_btn.click(
70
+ fn=processor.process_documents,
71
+ inputs=files,
72
+ outputs=status
73
+ )
74
+
75
+ def ask_question(question, chat_history):
76
+ if not question.strip():
77
+ return chat_history + [("", "Please enter a valid question")]
78
+ answer, success = processor.query(question)
79
+ return chat_history + [(question, answer)]
80
+
81
+ ask_btn.click(
82
+ fn=ask_question,
83
+ inputs=[question, chatbot],
84
+ outputs=chatbot
85
+ ).then(lambda: "", None, question)
86
+
87
+ clear_btn.click(
88
+ fn=lambda: [],
89
+ inputs=None,
90
+ outputs=chatbot
91
+ )
92
+
93
+ if __name__ == "__main__":
94
+ app.launch(debug=True)