File size: 13,282 Bytes
64fbadd
 
7c4f775
 
64fbadd
 
 
 
 
 
 
7c4f775
64fbadd
942596e
7c4f775
64fbadd
 
7c4f775
64fbadd
 
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
7c4f775
64fbadd
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
 
 
7c4f775
64fbadd
 
7c4f775
64fbadd
7c4f775
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
 
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
64fbadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c4f775
64fbadd
7c4f775
64fbadd
e580bee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# app.py

import gradio as gr
import torch
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
import os
import io

# Import the logic functions from src
import pipeline

# --- Global Objects & Setup ---
# (Most setup code remains here as it's needed globally for the app)

print("--- Starting App Setup ---")
# 1. Download Model File
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
model_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
if not os.path.exists(model_name):
    print("Downloading model...")
    os.system(f"wget {model_url}")
else:
    print("Model already downloaded.")

# 2. Prepare Default Sample Data & Example Batch
print("Loading default reviews...")
default_reviews_text = """
This laptop is a beast! The M3 chip is incredibly fast, and the battery lasts a solid 10 hours of heavy use... (rest of laptop reviews) ...dongle life is real.
---
I'm a student, and the battery life is a lifesaver... Highly recommend for college.
---
The keyboard is a dream to type on... Bluetooth connection dropping...
---
Video editing on this machine is flawless... price is very expensive...
---
I bought this for travel... battery easily gets me through a 6-hour flight...
---
Don't buy this if you need a lot of ports... only two USB-C ports...
"""
default_reviews_list = [r.strip() for r in default_reviews_text.strip().split('---') if r.strip()]

example_batch = """
I'm absolutely blown away by the "NovaBlend Pro" blender!... (rest of blender example)... save your money.
"""

# 3. Load Embedding Model, Text Splitter
print("Loading embedding model and text splitter...")
model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs=model_kwargs
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=40)

# 4. Create Default Vector Store
print("Creating default FAISS vector store...")
default_vector_store = pipeline.create_vector_store_from_content(
    "\n---\n".join(default_reviews_list), text_splitter, embeddings
)
if default_vector_store is None:
    raise ValueError("Failed to create default vector store!")
print("Default vector store created successfully.")

# Global variable to hold the CURRENT vector store for the chatbot
# NOTE: Using a global like this works for simple Gradio apps but isn't
# robust for multiple users. Gradio state or session management is better
# for multi-user scenarios, but this keeps it simpler for now.
current_chatbot_vector_store = default_vector_store
current_context_source = "Default Laptop Reviews"

# 5. Load the LLM
print("Loading LLM (Mistral-7B GGUF)...")
llm = LlamaCpp(
    model_path=model_name, n_gpu_layers=0, n_batch=512, n_ctx=4096,
    f16_kv=True, temperature=0.0, max_tokens=512, verbose=False,
    stop=["[/INST]", "User:", "Assistant:"]
)

# 6. Define All Prompts
print("Defining all prompts...")
# -- Phase 1 --
summary_template = """[INST] You are a helpful assistant... Reviews:\n{reviews} [/INST]\nConcise Summary:"""
summary_prompt = PromptTemplate(template=summary_template, input_variables=["reviews"])
aspect_template = """[INST] You are a helpful product analyst... Reviews:\n{reviews} [/INST]\nKey Pros and Cons:"""
aspect_prompt = PromptTemplate(template=aspect_template, input_variables=["reviews"])
sentiment_template = """[INST] You are a helpful sentiment analyst... Reviews:\n{reviews} [/INST]\nOverall Sentiment (Score 1-10):"""
sentiment_prompt = PromptTemplate(template=sentiment_template, input_variables=["reviews"])
# -- Phase 2 --
condense_question_template = """[INST] Given the following conversation... Follow Up Input: {question} [/INST]\nStandalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(condense_question_template)
qa_system_prompt = """[INST]
You are a factual assistant that answers only using the provided product reviews.
If the reviews include partial or uncertain information, summarize what they say.
If there is no information at all about the user’s question, respond with:
"I'm sorry, there isn't enough information in the reviews to answer that."

Do not use or infer information about price, comparisons to other brands, or availability unless they are directly mentioned in the reviews.
Always include a short "Evidence:" sentence if you found relevant mentions.

Context:
{context}

User question:
{question}
[/INST]
"""
qa_prompt = ChatPromptTemplate.from_messages([SystemMessagePromptTemplate.from_template(qa_system_prompt), HumanMessagePromptTemplate.from_template("Context:\n{context}\n\nQuestion:\n{question}\n\nHelpful Answer:")])
intent_template = """
[INST]
**CRITICAL INSTRUCTION:** Classify the user's query into ONLY ONE of two categories: "Product" or "Off-Topic".
Your response MUST be EXACTLY "Product" or EXACTLY "Off-Topic".

**EXAMPLES:**
Query: How is the battery life?
Classification: Product
Query: What are the complaints about the screen?
Classification: Product
Query: Does it come in blue?
Classification: Product
Query: What is the capital of France?
Classification: Off-Topic
Query: Hello there
Classification: Off-Topic
Query: Who are you?
Classification: Off-Topic

**NOW CLASSIFY THIS QUERY:**
Query: {query}
[/INST]
Classification:"""
intent_prompt = PromptTemplate(template=intent_template, input_variables=["query"])

# 7. Global Memory Object
chat_memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')

print("--- App Setup Complete ---")


# --- Gradio Helper Functions (Wrappers around pipeline logic) ---

def analyze_reviews_gradio_wrapper(review_text, review_file):
    """Gradio wrapper for Phase 1 analysis."""
    content = ""
    if review_file is not None:
        try:
            if hasattr(review_file, 'name'): file_path = review_file.name; f=open(file_path, 'rb'); byte_content = f.read(); f.close()
            else: byte_content = review_file
            try: content = byte_content.decode('utf-8')
            except UnicodeDecodeError: content = byte_content.decode('latin-1')
        except Exception as e: return f"Error reading file: {e}", "", ""
        if not content: return "Error: File empty", "", ""
    elif review_text:
        content = review_text
    else:
        return "Please paste reviews or upload a file.", "", ""

    # Call the core logic function
    return pipeline.analyze_reviews_logic(
        content, llm, summary_prompt, aspect_prompt, sentiment_prompt
    )

def update_chatbot_context_gradio_wrapper(chatbot_file_upload):
    """Gradio wrapper to update chatbot context."""
    global current_chatbot_vector_store, current_context_source # Modify globals

    if chatbot_file_upload is None:
        return f"No file uploaded. Chatbot context remains: **{current_context_source}**."

    print("Processing chatbot context file via Gradio...")
    content = ""
    file_name = "Uploaded File"
    try:
        if hasattr(chatbot_file_upload, 'name'):
            file_path = chatbot_file_upload.name
            file_name = os.path.basename(file_path)
            with open(file_path, 'rb') as f: byte_content = f.read()
        else: byte_content = chatbot_file_upload
        try: content = byte_content.decode('utf-8')
        except UnicodeDecodeError: content = byte_content.decode('latin-1')
    except Exception as e: return f"Error reading file: {e}. Context not updated."
    if not content: return "File empty. Context not updated."

    # Call the core logic function to create the store
    new_vector_store = pipeline.create_vector_store_from_content(content, text_splitter, embeddings)

    if new_vector_store:
        current_chatbot_vector_store = new_vector_store # Update global store
        current_context_source = f"File: {file_name}"
        status_message = f"Chatbot context updated using **{file_name}**."
        print(status_message)
        return status_message
    else:
        # If store creation failed, keep the old one
        status_message = f"Error creating context from {file_name}. Chatbot context remains: **{current_context_source}**."
        print(status_message)
        return status_message


def chat_responder_gradio_wrapper(message, chat_history):
    """Gradio wrapper for the chatbot response logic."""
    # Pass necessary global objects to the core logic function
    response = pipeline.get_chatbot_response(
        message=message,
        chat_memory=chat_memory,
        vector_store=current_chatbot_vector_store, # Use the current global store
        llm=llm,
        intent_prompt=intent_prompt,
        condense_prompt=CONDENSE_QUESTION_PROMPT,
        qa_prompt=qa_prompt
    )
    return response

def clear_chat_memory_gradio_wrapper():
    """Gradio wrapper to clear memory."""
    print("Clearing chat memory via Gradio button...")
    chat_memory.clear()
    print("Chat memory cleared.")
    return [] # Return empty list to clear ChatInterface display

def reset_context_to_default_gradio_wrapper():
     """Gradio wrapper to reset context to default."""
     global current_chatbot_vector_store, current_context_source
     print("Resetting context via Gradio button...")
     current_chatbot_vector_store = default_vector_store
     current_context_source = "Default Laptop Reviews"
     status_msg = f"Chatbot context reset to **{current_context_source}**."
     print(status_msg)
     return status_msg


# --- Gradio UI Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 Product Review Intelligence Center")
    gr.Markdown("Analyze product reviews using Mistral-7B (Tab 1) or chat about reviews with customizable context (Tab 2).")

    with gr.Tabs():
        # --- TAB 1: BATCH ANALYZER ---
        with gr.TabItem("Batch Analyzer"):
            gr.Markdown("Paste reviews OR upload a file (.txt, .csv) to analyze them.")
            gr.Markdown("**Note:** This analysis does *not* affect the chatbot's context in Tab 2.")
            with gr.Row():
                with gr.Column(scale=2):
                     review_input_text_tab1 = gr.Textbox(lines=15, placeholder="Paste reviews here...", label="Reviews Text Input")
                     review_input_file_tab1 = gr.File(label="Upload Reviews File (.txt, .csv)", file_types=[".txt", ".csv"])
                with gr.Column(scale=1):
                    summary_output_tab1 = gr.Textbox(label="Overall Summary", lines=5, interactive=False)
                    aspect_output_tab1 = gr.Textbox(label="Key Aspects (Pros/Cons)", lines=5, interactive=False)
                    sentiment_output_tab1 = gr.Textbox(label="Sentiment Analysis", lines=5, interactive=False)
            analyze_button_tab1 = gr.Button("Analyze Reviews")
            gr.Examples(examples=[[example_batch, None]], inputs=[review_input_text_tab1, review_input_file_tab1], outputs=[summary_output_tab1, aspect_output_tab1, sentiment_output_tab1], fn=analyze_reviews_gradio_wrapper, cache_examples=False) # Use wrapper
            analyze_button_tab1.click(fn=analyze_reviews_gradio_wrapper, inputs=[review_input_text_tab1, review_input_file_tab1], outputs=[summary_output_tab1, aspect_output_tab1, sentiment_output_tab1]) # Use wrapper

        # --- TAB 2: CHAT ABOUT REVIEWS ---
        with gr.TabItem("Ask a Question (Chatbot)"):
            gr.Markdown("Ask specific questions about product reviews. Upload a file below to change the chatbot's knowledge base.")
            chatbot_status_display = gr.Markdown(f"Chatbot is currently using: **{current_context_source}**")
            with gr.Row():
                chatbot_context_file = gr.File(label="Upload Chatbot Context File (.txt, .csv)", file_types=[".txt", ".csv"], scale=3)
                update_context_button = gr.Button("Update Chatbot Context", scale=1)
            chatbot_interface = gr.ChatInterface(
                fn=chat_responder_gradio_wrapper, # Use wrapper
                examples=["How is the battery life?", "What about the screen?", "What are the complaints about connectivity?", "What is the capital of France?"],
                title="Review Chatbot"
            )
            with gr.Row():
                reset_memory_button = gr.Button("🔄 Reset Chat Memory")
                reset_context_button = gr.Button("🔄 Reset Chatbot Context to Default")
            # Link actions to wrapper functions
            update_context_button.click(fn=update_chatbot_context_gradio_wrapper, inputs=[chatbot_context_file], outputs=[chatbot_status_display])
            reset_memory_button.click(fn=clear_chat_memory_gradio_wrapper, inputs=None, outputs=[chatbot_interface])
            reset_context_button.click(fn=reset_context_to_default_gradio_wrapper, inputs=None, outputs=[chatbot_status_display])

# --- Launch Command ---
if __name__ == "__main__":
    chat_memory.clear() # Clear memory each time app starts
    demo.launch(debug=True)