Otium_testing

Sleeping

App Files Files Community

vivianoh commited on Aug 12, 2025

Commit

e6cc2e1

verified ·

1 Parent(s): e20ac9d

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -1

app.py CHANGED Viewed

@@ -2,9 +2,60 @@ import gradio as gr
 import random
 from huggingface_hub import InferenceClient
 # import lines go at the top: any libraries I need to import go up here ^^
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(message, history):
     messages = [{"role": "system", "content": "You are a friendly chatbot."}]
@@ -27,4 +78,7 @@ def yes_or_no(message, history):
 chatbot = gr.ChatInterface(respond, type = "messages")
 # defining my chatbot so that the user can interact and see their conversation history and send new messages
 chatbot.launch()

 import random
 from huggingface_hub import InferenceClient
 # import lines go at the top: any libraries I need to import go up here ^^
+from sentence_transformers import SentenceTransformer
+import torch
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# Step 1: Load the knowledge base
+with open("Untitled document.txt", "r", encoding="utf-8") as f:
+    skincare_text = f.read()
+# Step 2: Preprocess text into sentence chunks
+def preprocess_text(text):
+    cleaned_text = text.strip()
+    chunks = cleaned_text.split(".")
+    cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
+    print(f"Sample chunks: {cleaned_chunks[:3]}")
+    print(f"There are {len(cleaned_chunks)} chunks.")
+    return cleaned_chunks
+cleaned_chunks = preprocess_text(skincare_text)
+# Step 3: Convert chunks into embeddings
+from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def create_embeddings(text_chunks):
+    chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
+    print(f"Embeddings shape: {chunk_embeddings.shape}")
+    return chunk_embeddings
+chunk_embeddings = create_embeddings(cleaned_chunks)
+# Step 4: Retrieve top matching chunks
+def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    query_norm = query_embedding / query_embedding.norm()
+    chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
+    similarities = torch.matmul(chunks_norm, query_norm)
+    top_indices = torch.topk(similarities, k=top_k).indices
+    return [text_chunks[i] for i in top_indices]
+# Step 5: Test the workflow with sample queries
+queries = [
+    "Consistent skincare routine",
+    "Applying sunscreen daily",
+    "Choosing products that match your skin type"
+]
+for q in queries:
+    print(f"\nQuery: {q}")
+    results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
+    for idx, res in enumerate(results, 1):
+        print(f"Result {idx}: {res}")
 def respond(message, history):
     messages = [{"role": "system", "content": "You are a friendly chatbot."}]
 chatbot = gr.ChatInterface(respond, type = "messages")
 # defining my chatbot so that the user can interact and see their conversation history and send new messages
+top_results = get_top_chunks(question, chunk_embeddings, cleaned_chunks)
+print(top_results)
 chatbot.launch()