Rohitface commited on
Commit
92d3f3d
·
verified ·
1 Parent(s): d79387d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
+ import numpy as np
8
+
9
+ # --- Backend Logic ---
10
+
11
+ # Step 1: Load the necessary models
12
+ # UPGRADED: The generator model is now 'google/flan-t5-large' for better responses.
13
+ print("Loading models... This may take a moment, especially the first time.")
14
+ generator = pipeline("text2text-generation", model="google/flan-t5-large")
15
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
16
+ print("Models loaded successfully!")
17
+
18
+ def chunk_text(text, chunk_size=256, overlap=32):
19
+ """Splits text into overlapping chunks."""
20
+ words = text.split()
21
+ chunks = []
22
+ for i in range(0, len(words), chunk_size - overlap):
23
+ chunks.append(" ".join(words[i:i + chunk_size]))
24
+ return chunks
25
+
26
+ def process_chat_request(user_question, chat_history, state_data):
27
+ """
28
+ The main function that handles the chat logic using the RAG pipeline.
29
+ """
30
+ index = state_data.get("index")
31
+ chunks = state_data.get("chunks")
32
+
33
+ if not all([index, chunks]):
34
+ raise gr.Error("File index is missing. Please restart by uploading a file.")
35
+ if not user_question:
36
+ raise gr.Error("Please enter a question.")
37
+
38
+ try:
39
+ # 1. RETRIEVE: Find the most relevant chunks
40
+ question_embedding = embedder.encode([user_question])
41
+ _, top_k_indices = index.search(question_embedding, k=3) # Retrieve top 3 chunks
42
+
43
+ context = " ".join([chunks[i] for i in top_k_indices[0]])
44
+
45
+ # 2. GENERATE: Create a prompt and get an answer
46
+ prompt = f"""
47
+ Based on the following context, provide a detailed answer to the user's question.
48
+
49
+ CONTEXT:
50
+ ---
51
+ {context}
52
+ ---
53
+
54
+ QUESTION: {user_question}
55
+
56
+ ANSWER:
57
+ """
58
+
59
+ result = generator(
60
+ prompt,
61
+ max_length=512,
62
+ num_beams=4,
63
+ temperature=0.1
64
+ )
65
+ bot_response = result[0]['generated_text']
66
+
67
+ except Exception as e:
68
+ raise gr.Error(f"An error occurred during processing: {e}")
69
+
70
+ chat_history.append((user_question, bot_response))
71
+ return "", chat_history
72
+
73
+ # --- Gradio UI Definition ---
74
+
75
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="teal"), title="Text File Analyzer") as demo:
76
+ app_state = gr.State({})
77
+
78
+ with gr.Column(visible=True) as welcome_page:
79
+ gr.Markdown(
80
+ """
81
+ <div style='text-align: center; font-family: "Garamond", serif; padding-top: 30px;'>
82
+ <h1 style='font-size: 3.5em;'>Efficient Text File Analyzer</h1>
83
+ <p style='font-size: 1.5em; color: #555;'>Chat with any .txt document using an efficient RAG pipeline.</p>
84
+ </div>
85
+ """
86
+ )
87
+ gr.HTML(
88
+ """
89
+ <div style='text-align: center; padding: 20px;'>
90
+ <img src='https://media.giphy.com/media/v1.Y2lkPTc5MGI3NjExd2Vjb3M2eGZzN2FkNWZpZzZ0bWl0c2JqZzZlMHVwZ2l4b2t0eXFpcyZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/YWjDA4k2n6d5Ew42zC/giphy.gif'
91
+ style='max-width: 350px; margin: auto; border-radius: 20px; box-shadow: 0 8px 16px rgba(0,0,0,0.1);' />
92
+ </div>
93
+ """
94
+ )
95
+ with gr.Column(horizontal_alignment="center"):
96
+ gr.Markdown("### Upload Your Text File")
97
+ chat_file_upload = gr.File(label="Upload any .txt file", file_types=[".txt"])
98
+ lets_chat_button = gr.Button("💬 Process File and Start Chatting 💬", variant="primary")
99
+
100
+ with gr.Column(visible=False) as chat_page:
101
+ gr.Markdown("<h1 style='text-align: center;'>Chat with your Document</h1>")
102
+ chatbot_ui = gr.Chatbot(height=600, bubble_full_width=False)
103
+ with gr.Row():
104
+ user_input_box = gr.Textbox(placeholder="Ask a question about your file...", scale=5)
105
+ submit_button = gr.Button("Send", variant="primary", scale=1)
106
+
107
+ def go_to_chat(current_state, chat_file, progress=gr.Progress()):
108
+ if chat_file is None:
109
+ raise gr.Error("A file must be uploaded.")
110
+
111
+ progress(0, desc="Reading file...")
112
+ with open(chat_file.name, 'r', encoding='utf-8') as f:
113
+ content = f.read()
114
+
115
+ progress(0.2, desc="Chunking text...")
116
+ chunks = chunk_text(content)
117
+
118
+ progress(0.5, desc="Creating embeddings... (This might take a moment)")
119
+ embeddings = embedder.encode(chunks, show_progress_bar=True)
120
+
121
+ progress(0.8, desc="Building search index...")
122
+ index = faiss.IndexFlatL2(embeddings.shape[1])
123
+ index.add(np.array(embeddings).astype('float32'))
124
+
125
+ new_state = {
126
+ "index": index,
127
+ "chunks": chunks
128
+ }
129
+
130
+ progress(1, desc="Done!")
131
+ return (
132
+ new_state,
133
+ gr.Column(visible=False),
134
+ gr.Column(visible=True)
135
+ )
136
+
137
+ lets_chat_button.click(
138
+ fn=go_to_chat,
139
+ inputs=[app_state, chat_file_upload],
140
+ outputs=[app_state, welcome_page, chat_page]
141
+ )
142
+ submit_button.click(
143
+ fn=process_chat_request,
144
+ inputs=[user_input_box, chatbot_ui, app_state],
145
+ outputs=[user_input_box, chatbot_ui]
146
+ )
147
+ user_input_box.submit(
148
+ fn=process_chat_request,
149
+ inputs=[user_input_box, chatbot_ui, app_state],
150
+ outputs=[user_input_box, chatbot_ui]
151
+ )
152
+
153
+ if __name__ == "__main__":
154
+ demo.launch()