from transformers import AutoTokenizer, AutoModel import torch import numpy as np from sklearn.metrics.pairwise import cosine_similarity # Load pre-trained model and tokenizer model_name = "sentence-transformers/all-MiniLM-L6-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) def generate_sentence_embedding(sentence): """Generate embedding for the entire sentence.""" inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True) with torch.no_grad(): outputs = model(**inputs) # Get the CLS token embedding embeddings = outputs.last_hidden_state[:, 0, :].squeeze().numpy() return embeddings def find_word_embedding(sentence, word): """ Locate the embedding vector for a specific word in a sentence. Combines subword embeddings if the word is tokenized into subwords. """ inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True) tokens = tokenizer.tokenize(sentence) word_subwords = tokenizer.tokenize(word) word_subword_ids = tokenizer.convert_tokens_to_ids(word_subwords) with torch.no_grad(): outputs = model(**inputs) embeddings = outputs.last_hidden_state[0] # Shape: (seq_length, hidden_size) subword_embeddings = [] for subword_id in word_subword_ids: subword_indices = (inputs['input_ids'][0] == subword_id).nonzero(as_tuple=True)[0] if len(subword_indices) > 0: subword_embeddings.append(embeddings[subword_indices[0]]) if subword_embeddings: # Combine subword embeddings, e.g., by averaging them word_embedding = torch.mean(torch.stack(subword_embeddings), dim=0) return word_embedding.numpy() return None # Corpus about Jeff corpus = [ "Albert Einstein was born in Ulm, Germany, in 1879. Einstein is best known for his theory of relativity. " "His equation, E=mc^2, is one of the most famous formulas in physics. Einstein received the Nobel Prize in Physics in 1921. " "He was awarded the prize for his explanation of the photoelectric effect. Einstein's work revolutionized our understanding of space and time. " "As a child, Einstein had a slow development in speaking. He showed a deep curiosity for science and mathematics from a young age. " "Einstein’s general theory of relativity was published in 1915. He completed his PhD at the University of Zurich in 1905. " "The year 1905 is known as Einstein's 'Annus Mirabilis' or 'Miracle Year.' During that year, Einstein published four groundbreaking papers. " "Einstein’s discoveries laid the foundation for modern physics. He worked at the Swiss Patent Office while developing his theories. " "Einstein’s love for science was inspired by a compass he received as a child. His general relativity theory predicted the bending of light by gravity. " "In 1919, an eclipse confirmed Einstein's predictions, making him famous. Einstein was a pacifist and spoke against war and violence. " "He immigrated to the United States in 1933 to escape Nazi Germany. Einstein joined the Institute for Advanced Study in Princeton, New Jersey. " "He became a U.S. citizen in 1940. Einstein advocated for civil rights and equality. He wrote a letter to President Roosevelt in 1939 about the potential of atomic energy. " "Einstein’s letter indirectly led to the Manhattan Project. Despite his role, Einstein opposed the use of nuclear weapons. He was a strong advocate for global peace and disarmament. " "Einstein believed in the power of education and intellectual freedom. He supported the establishment of the Hebrew University of Jerusalem. " "Einstein’s passion for music matched his love for science. He played the violin and often said music helped him think. He admired Mozart and was influenced by his compositions. " "Einstein’s hairstyle became an iconic part of his image. He often wore simple clothes and disliked formal attire. Einstein once said, 'Imagination is more important than knowledge.' " "He published more than 300 scientific papers in his lifetime. Einstein also wrote about philosophy, politics, and human rights. His theories inspired advancements in quantum mechanics. " "Einstein worked with Niels Bohr on quantum theory debates. He coined the phrase 'spooky action at a distance' about quantum entanglement. " "Einstein was married twice, first to Mileva Marić and later to Elsa Einstein. His first wife, Mileva, was a physicist and mathematician. " "Einstein had three children: Hans Albert, Eduard, and Lieserl. Eduard Einstein suffered from schizophrenia later in life. Einstein loved sailing, even though he wasn’t a strong swimmer. " "He was known for his quirky sense of humor and wit. Einstein was offered the presidency of Israel in 1952 but declined. He wrote extensively on the relationship between science and religion. " "Einstein once remarked, 'God does not play dice with the universe.' He worked tirelessly to develop a unified field theory. " "His attempts to unify gravity and electromagnetism were unsuccessful. Einstein corresponded with many influential figures of his time. " "He had a lifelong friendship with Indian physicist Satyendra Nath Bose. The Bose-Einstein condensate is named after their collaborative work. " "Einstein was fascinated by light and its dual wave-particle nature. He faced opposition from some physicists for challenging Newtonian mechanics. " "Einstein’s theories paved the way for advancements in GPS technology. His face is one of the most recognizable symbols of genius. " "Einstein believed in the moral responsibility of scientists. He once said, 'Life is like riding a bicycle. To keep your balance, you must keep moving.' " "Einstein’s brain was preserved for scientific study after his death. His brain was found to have unusual patterns of neuron density. " "Einstein inspired countless scientists, artists, and thinkers. He was known for his kind and approachable demeanor. Einstein supported efforts to combat racism in the United States. " "He often spoke at universities and public events to inspire young minds. Einstein’s work influenced modern cosmology and black hole theory. " "He had a deep admiration for Mahatma Gandhi and his philosophy of nonviolence. Einstein’s contributions extended beyond science to social and political issues. " "He wrote letters to prominent leaders advocating for world peace. Einstein believed that curiosity was the key to learning and progress. " "He often reflected on the mysteries of the universe and human existence. Einstein was deeply influenced by the works of Isaac Newton. " "He admired the simplicity and elegance of scientific theories. Einstein’s famous photograph sticking out his tongue was taken in 1951. " "He joked that his fame made life more complicated than he desired. Einstein’s death occurred on April 18, 1955, in Princeton, New Jersey. " "He refused surgery in his final days, saying he wanted to live naturally. Einstein’s ashes were scattered at an undisclosed location. " "He believed in the interconnectedness of all things in the universe. Einstein was deeply philosophical about the role of science in society. " "He wrote essays questioning the ethics of technological advancements. Einstein’s curiosity was boundless, driving his relentless pursuit of knowledge. " "His contributions to theoretical physics continue to shape modern science. Einstein’s general relativity reshaped our understanding of gravity. " "He had a profound influence on the development of space exploration. Einstein’s ideas are taught in schools and universities worldwide. " "He left behind a legacy of intellectual courage and moral conviction. Einstein’s theories sparked a scientific revolution in the 20th century. " "He was a pioneer in using thought experiments to explore complex ideas. Einstein’s legacy continues to inspire research in physics and beyond. " "He often expressed gratitude for the simplicity of nature’s laws. Einstein believed that science should serve humanity's greatest good. " "He viewed science and art as complementary expressions of human creativity. Einstein valued individuality and free thought above conformity. " "His insights have been applied in technology, medicine, and engineering. Einstein’s theoretical predictions have been confirmed through experiments. " "His work on relativity contributed to our understanding of time dilation. Einstein’s letters and manuscripts are preserved in archives around the world. " "He believed that humility was essential for the pursuit of truth. Albert Einstein remains a timeless symbol of human intellect and curiosity." ] # Calculate embeddings for "Einstein" in each sentence jeff_embeddings = [] for sentence in corpus: embedding = find_word_embedding(sentence, "Einstein") if embedding is not None: jeff_embeddings.append(embedding) # Calculate the average embedding for "Einstein" average_jeff_embedding = np.mean(jeff_embeddings, axis=0) # Generate embedding for "Person is a human" person_human_embedding = generate_sentence_embedding("A person is defined as an individual human being distinguished by characteristics such as consciousness, individuality, and the ability to think, feel, and act. It typically refers to someone with legal rights, responsibilities, and distinct identity within society..") # Calculate similarity score similarity = cosine_similarity([average_jeff_embedding], [person_human_embedding])[0][0] print("Average Embedding for 'Einstein':", average_jeff_embedding) print("Embedding for 'Person is a human':", person_human_embedding) print("Similarity Score between 'Einstein' and 'Person is a human':", similarity)