MasterBrand_ChatBot

Sleeping

App Files Files Community

Yordann commited on Sep 17

Commit

00b9c1a

verified ·

1 Parent(s): 6900841

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py CHANGED Viewed

@@ -11,11 +11,73 @@ from bs4 import BeautifulSoup
 import cv2
 from io import BytesIO
 import torch
 login(token=os.getenv("chatbot"))
 generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
 bg_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-bg-en")
 en_to_bg = pipeline("translation", model="Helsinki-NLP/opus-mt-en-bg")
 # Load BLIP for image captioning
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -60,6 +122,12 @@ def generate_response(user_input, top_p, temperature, chat_counter, chatbot, his
     prompt = ""
     # Multimodal additions
     if image is not None:
         try:

 import cv2
 from io import BytesIO
 import torch
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
 login(token=os.getenv("chatbot"))
 generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
 bg_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-bg-en")
 en_to_bg = pipeline("translation", model="Helsinki-NLP/opus-mt-en-bg")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+def load_chunks(path, chunk_size=300):
+    with open(path, "r", encoding="utf-8") as f:
+        text = f.read()
+    sentences = text.split(". ")
+    chunks, chunk = [], ""
+    for sentence in sentences:
+        if len(chunk.split()) + len(sentence.split()) < chunk_size:
+            chunk += sentence + ". "
+        else:
+            chunks.append(chunk.strip())
+            chunk = sentence + ". "
+    if chunk:
+        chunks.append(chunk.strip())
+    return chunks
+# Load your document chunks
+chunks = load_chunks("MasterBrand Explanation.txt")
+# Create embeddings
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = embedding_model.encode(chunks)
+# Build FAISS index
+dimension = embeddings[0].shape[0]
+index = faiss.IndexFlatL2(dimension)
+index.add(np.array(embeddings))
+def search_similar_chunks(query, k=3):
+    query_embedding = embedding_model.encode([query])
+    distances, indices = index.search(np.array(query_embedding), k)
+    return [chunks[i] for i in indices[0]]
+def generate_answer_with_context(question):
+    top_chunks = search_similar_chunks(question)
+    context = "\n\n".join(top_chunks)
+    prompt = f"""<s>
+You are a helpful assistant trained on e-commerce and branding content.
+Use the context below to answer the question.
+Context:
+{context}
+Question: {question}
+Answer:"""
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=300)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response.replace(prompt, "").strip()
 # Load BLIP for image captioning
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     prompt = ""
+    top_chunks = search_similar_chunks(user_input_translated)
+    rag_context = "\n\n".join(top_chunks)
+    prompt += f"[Context from your e-commerce training document]:\n{rag_context}\n\n"
     # Multimodal additions
     if image is not None:
         try: