CapstoneProject / app.py
mingbaer's picture
Update app.py
bb2aed8 verified
raw
history blame
1.78 kB
pip install sentence-transformers
# FRONTEND: Python library that makes it super easy to build simple user interfaces (UIs)
import gradio as gr
# BACKEND: tool from Hugging Face library to send messages to AI models and get answers back
from huggingface_hub import InferenceClient
# Helpful commentary from ChatGPT:
# Gradio is the face and mouth — it lets people talk to the robot.
# InferenceClient is the brain connector — it lets your robot talk to a super-smart brain (the Hugging Face model) and get answers.
from sentence_transformers import SentenceTransformer
# a Python library that allows you to turn sentences into numerical vector embeddings
import torch
# a machine learning library that that performs cosine similarity calculations
import numpy as np
# upload knowledge base - from sentiment analysis lab
with open("essay_writing.txt", "r", encoding="utf-8") as file:
essay_writing = file.read()
# split the text into chunks
cleaned_text = essay_writing.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = [chunk.strip() for chunk in chunks if stripped_chunk]
# load an embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
def pull_relevant_info(query):
query_embedding = model.encode(query, convert_to_tensor=True)
query_embedding_normalized = query_embedding / query_embedding.norm()
chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
top_indices = torch.topk(similarities, k=3).indices.cpu().numpy()
relevant_info = "\n".join([chunks[i] for i in top_indices])
return relevant_info