Spaces:
Sleeping
Sleeping
File size: 13,282 Bytes
64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 942596e 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd 7c4f775 64fbadd e580bee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
# app.py
import gradio as gr
import torch
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
import os
import io
# Import the logic functions from src
import pipeline
# --- Global Objects & Setup ---
# (Most setup code remains here as it's needed globally for the app)
print("--- Starting App Setup ---")
# 1. Download Model File
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
model_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
if not os.path.exists(model_name):
print("Downloading model...")
os.system(f"wget {model_url}")
else:
print("Model already downloaded.")
# 2. Prepare Default Sample Data & Example Batch
print("Loading default reviews...")
default_reviews_text = """
This laptop is a beast! The M3 chip is incredibly fast, and the battery lasts a solid 10 hours of heavy use... (rest of laptop reviews) ...dongle life is real.
---
I'm a student, and the battery life is a lifesaver... Highly recommend for college.
---
The keyboard is a dream to type on... Bluetooth connection dropping...
---
Video editing on this machine is flawless... price is very expensive...
---
I bought this for travel... battery easily gets me through a 6-hour flight...
---
Don't buy this if you need a lot of ports... only two USB-C ports...
"""
default_reviews_list = [r.strip() for r in default_reviews_text.strip().split('---') if r.strip()]
example_batch = """
I'm absolutely blown away by the "NovaBlend Pro" blender!... (rest of blender example)... save your money.
"""
# 3. Load Embedding Model, Text Splitter
print("Loading embedding model and text splitter...")
model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs=model_kwargs
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=40)
# 4. Create Default Vector Store
print("Creating default FAISS vector store...")
default_vector_store = pipeline.create_vector_store_from_content(
"\n---\n".join(default_reviews_list), text_splitter, embeddings
)
if default_vector_store is None:
raise ValueError("Failed to create default vector store!")
print("Default vector store created successfully.")
# Global variable to hold the CURRENT vector store for the chatbot
# NOTE: Using a global like this works for simple Gradio apps but isn't
# robust for multiple users. Gradio state or session management is better
# for multi-user scenarios, but this keeps it simpler for now.
current_chatbot_vector_store = default_vector_store
current_context_source = "Default Laptop Reviews"
# 5. Load the LLM
print("Loading LLM (Mistral-7B GGUF)...")
llm = LlamaCpp(
model_path=model_name, n_gpu_layers=0, n_batch=512, n_ctx=4096,
f16_kv=True, temperature=0.0, max_tokens=512, verbose=False,
stop=["[/INST]", "User:", "Assistant:"]
)
# 6. Define All Prompts
print("Defining all prompts...")
# -- Phase 1 --
summary_template = """[INST] You are a helpful assistant... Reviews:\n{reviews} [/INST]\nConcise Summary:"""
summary_prompt = PromptTemplate(template=summary_template, input_variables=["reviews"])
aspect_template = """[INST] You are a helpful product analyst... Reviews:\n{reviews} [/INST]\nKey Pros and Cons:"""
aspect_prompt = PromptTemplate(template=aspect_template, input_variables=["reviews"])
sentiment_template = """[INST] You are a helpful sentiment analyst... Reviews:\n{reviews} [/INST]\nOverall Sentiment (Score 1-10):"""
sentiment_prompt = PromptTemplate(template=sentiment_template, input_variables=["reviews"])
# -- Phase 2 --
condense_question_template = """[INST] Given the following conversation... Follow Up Input: {question} [/INST]\nStandalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(condense_question_template)
qa_system_prompt = """[INST]
You are a factual assistant that answers only using the provided product reviews.
If the reviews include partial or uncertain information, summarize what they say.
If there is no information at all about the user’s question, respond with:
"I'm sorry, there isn't enough information in the reviews to answer that."
Do not use or infer information about price, comparisons to other brands, or availability unless they are directly mentioned in the reviews.
Always include a short "Evidence:" sentence if you found relevant mentions.
Context:
{context}
User question:
{question}
[/INST]
"""
qa_prompt = ChatPromptTemplate.from_messages([SystemMessagePromptTemplate.from_template(qa_system_prompt), HumanMessagePromptTemplate.from_template("Context:\n{context}\n\nQuestion:\n{question}\n\nHelpful Answer:")])
intent_template = """
[INST]
**CRITICAL INSTRUCTION:** Classify the user's query into ONLY ONE of two categories: "Product" or "Off-Topic".
Your response MUST be EXACTLY "Product" or EXACTLY "Off-Topic".
**EXAMPLES:**
Query: How is the battery life?
Classification: Product
Query: What are the complaints about the screen?
Classification: Product
Query: Does it come in blue?
Classification: Product
Query: What is the capital of France?
Classification: Off-Topic
Query: Hello there
Classification: Off-Topic
Query: Who are you?
Classification: Off-Topic
**NOW CLASSIFY THIS QUERY:**
Query: {query}
[/INST]
Classification:"""
intent_prompt = PromptTemplate(template=intent_template, input_variables=["query"])
# 7. Global Memory Object
chat_memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
print("--- App Setup Complete ---")
# --- Gradio Helper Functions (Wrappers around pipeline logic) ---
def analyze_reviews_gradio_wrapper(review_text, review_file):
"""Gradio wrapper for Phase 1 analysis."""
content = ""
if review_file is not None:
try:
if hasattr(review_file, 'name'): file_path = review_file.name; f=open(file_path, 'rb'); byte_content = f.read(); f.close()
else: byte_content = review_file
try: content = byte_content.decode('utf-8')
except UnicodeDecodeError: content = byte_content.decode('latin-1')
except Exception as e: return f"Error reading file: {e}", "", ""
if not content: return "Error: File empty", "", ""
elif review_text:
content = review_text
else:
return "Please paste reviews or upload a file.", "", ""
# Call the core logic function
return pipeline.analyze_reviews_logic(
content, llm, summary_prompt, aspect_prompt, sentiment_prompt
)
def update_chatbot_context_gradio_wrapper(chatbot_file_upload):
"""Gradio wrapper to update chatbot context."""
global current_chatbot_vector_store, current_context_source # Modify globals
if chatbot_file_upload is None:
return f"No file uploaded. Chatbot context remains: **{current_context_source}**."
print("Processing chatbot context file via Gradio...")
content = ""
file_name = "Uploaded File"
try:
if hasattr(chatbot_file_upload, 'name'):
file_path = chatbot_file_upload.name
file_name = os.path.basename(file_path)
with open(file_path, 'rb') as f: byte_content = f.read()
else: byte_content = chatbot_file_upload
try: content = byte_content.decode('utf-8')
except UnicodeDecodeError: content = byte_content.decode('latin-1')
except Exception as e: return f"Error reading file: {e}. Context not updated."
if not content: return "File empty. Context not updated."
# Call the core logic function to create the store
new_vector_store = pipeline.create_vector_store_from_content(content, text_splitter, embeddings)
if new_vector_store:
current_chatbot_vector_store = new_vector_store # Update global store
current_context_source = f"File: {file_name}"
status_message = f"Chatbot context updated using **{file_name}**."
print(status_message)
return status_message
else:
# If store creation failed, keep the old one
status_message = f"Error creating context from {file_name}. Chatbot context remains: **{current_context_source}**."
print(status_message)
return status_message
def chat_responder_gradio_wrapper(message, chat_history):
"""Gradio wrapper for the chatbot response logic."""
# Pass necessary global objects to the core logic function
response = pipeline.get_chatbot_response(
message=message,
chat_memory=chat_memory,
vector_store=current_chatbot_vector_store, # Use the current global store
llm=llm,
intent_prompt=intent_prompt,
condense_prompt=CONDENSE_QUESTION_PROMPT,
qa_prompt=qa_prompt
)
return response
def clear_chat_memory_gradio_wrapper():
"""Gradio wrapper to clear memory."""
print("Clearing chat memory via Gradio button...")
chat_memory.clear()
print("Chat memory cleared.")
return [] # Return empty list to clear ChatInterface display
def reset_context_to_default_gradio_wrapper():
"""Gradio wrapper to reset context to default."""
global current_chatbot_vector_store, current_context_source
print("Resetting context via Gradio button...")
current_chatbot_vector_store = default_vector_store
current_context_source = "Default Laptop Reviews"
status_msg = f"Chatbot context reset to **{current_context_source}**."
print(status_msg)
return status_msg
# --- Gradio UI Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Product Review Intelligence Center")
gr.Markdown("Analyze product reviews using Mistral-7B (Tab 1) or chat about reviews with customizable context (Tab 2).")
with gr.Tabs():
# --- TAB 1: BATCH ANALYZER ---
with gr.TabItem("Batch Analyzer"):
gr.Markdown("Paste reviews OR upload a file (.txt, .csv) to analyze them.")
gr.Markdown("**Note:** This analysis does *not* affect the chatbot's context in Tab 2.")
with gr.Row():
with gr.Column(scale=2):
review_input_text_tab1 = gr.Textbox(lines=15, placeholder="Paste reviews here...", label="Reviews Text Input")
review_input_file_tab1 = gr.File(label="Upload Reviews File (.txt, .csv)", file_types=[".txt", ".csv"])
with gr.Column(scale=1):
summary_output_tab1 = gr.Textbox(label="Overall Summary", lines=5, interactive=False)
aspect_output_tab1 = gr.Textbox(label="Key Aspects (Pros/Cons)", lines=5, interactive=False)
sentiment_output_tab1 = gr.Textbox(label="Sentiment Analysis", lines=5, interactive=False)
analyze_button_tab1 = gr.Button("Analyze Reviews")
gr.Examples(examples=[[example_batch, None]], inputs=[review_input_text_tab1, review_input_file_tab1], outputs=[summary_output_tab1, aspect_output_tab1, sentiment_output_tab1], fn=analyze_reviews_gradio_wrapper, cache_examples=False) # Use wrapper
analyze_button_tab1.click(fn=analyze_reviews_gradio_wrapper, inputs=[review_input_text_tab1, review_input_file_tab1], outputs=[summary_output_tab1, aspect_output_tab1, sentiment_output_tab1]) # Use wrapper
# --- TAB 2: CHAT ABOUT REVIEWS ---
with gr.TabItem("Ask a Question (Chatbot)"):
gr.Markdown("Ask specific questions about product reviews. Upload a file below to change the chatbot's knowledge base.")
chatbot_status_display = gr.Markdown(f"Chatbot is currently using: **{current_context_source}**")
with gr.Row():
chatbot_context_file = gr.File(label="Upload Chatbot Context File (.txt, .csv)", file_types=[".txt", ".csv"], scale=3)
update_context_button = gr.Button("Update Chatbot Context", scale=1)
chatbot_interface = gr.ChatInterface(
fn=chat_responder_gradio_wrapper, # Use wrapper
examples=["How is the battery life?", "What about the screen?", "What are the complaints about connectivity?", "What is the capital of France?"],
title="Review Chatbot"
)
with gr.Row():
reset_memory_button = gr.Button("🔄 Reset Chat Memory")
reset_context_button = gr.Button("🔄 Reset Chatbot Context to Default")
# Link actions to wrapper functions
update_context_button.click(fn=update_chatbot_context_gradio_wrapper, inputs=[chatbot_context_file], outputs=[chatbot_status_display])
reset_memory_button.click(fn=clear_chat_memory_gradio_wrapper, inputs=None, outputs=[chatbot_interface])
reset_context_button.click(fn=reset_context_to_default_gradio_wrapper, inputs=None, outputs=[chatbot_status_display])
# --- Launch Command ---
if __name__ == "__main__":
chat_memory.clear() # Clear memory each time app starts
demo.launch(debug=True)
|