Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import google.generativeai as genai | |
| import numpy as np | |
| # Configure Gemini API | |
| genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) | |
| st.title("Text Embedding Similarity Test") | |
| def split_into_chunks(text, chunk_size=500): | |
| """Split text into chunks of approximately specified character length""" | |
| return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| def get_embedding(text): | |
| """Get embedding for a single text chunk""" | |
| return genai.embed_content( | |
| model="models/text-embedding-004", | |
| content=text | |
| )['embedding'] | |
| def cosine_similarity(vec1, vec2): | |
| """Compute cosine similarity between two vectors""" | |
| return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) | |
| # Text input areas | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| input_text1 = st.text_area("Enter your first text:", | |
| height=200, | |
| placeholder="Type or paste your first text here...") | |
| with col2: | |
| input_text2 = st.text_area("Enter text to compare:", | |
| height=200, | |
| placeholder="Type or paste text to compare...") | |
| if st.button("Run Similarity Test"): | |
| if not input_text1.strip() or not input_text2.strip(): | |
| st.warning("Please enter text in both input fields.") | |
| else: | |
| with st.spinner("Analyzing texts..."): | |
| try: | |
| # Process first text into chunks | |
| chunks = split_into_chunks(input_text1) | |
| if len(chunks) > 1: | |
| st.info(f"Split first text into {len(chunks)} chunks") | |
| # Generate embeddings for all chunks | |
| embeddings = [get_embedding(chunk) for chunk in chunks] | |
| # Generate embedding for comparison text | |
| compare_embedding = get_embedding(input_text2) | |
| # Calculate similarities | |
| similarities = [cosine_similarity(emb, compare_embedding) for emb in embeddings] | |
| max_score = max(similarities) | |
| max_index = similarities.index(max_score) | |
| # Display results | |
| st.subheader("π Similarity Results") | |
| st.write(f"**Highest similarity score:** {max_score:.4f}") | |
| st.subheader("π§© Most Similar Chunk") | |
| st.write(chunks[max_index]) | |
| st.subheader("π All Chunk Similarities") | |
| for i, (chunk, score) in enumerate(zip(chunks, similarities)): | |
| st.write(f"Chunk {i+1} ({len(chunk)} chars): {score:.4f}") | |
| st.expander(f"View chunk {i+1}").write(chunk) | |
| except Exception as e: | |
| st.error(f"Error processing texts: {str(e)}") |