File size: 1,126 Bytes
5798c9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import os

# Load data
with open("data/handshape_book.txt", encoding="utf-8") as f:
    book_text = f.read()

# Split into chunks
chunks = [chunk.strip() for chunk in book_text.split('\n\n') if len(chunk.strip()) > 100]

# Create embeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
corpus_embeddings = model.encode(chunks, convert_to_tensor=True)

# Search function
def ask_question(question):
    question_embedding = model.encode(question, convert_to_tensor=True)
    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=3)[0]
    results = "\n\n---\n\n".join([chunks[hit['corpus_id']] for hit in hits])
    return results

# Gradio UI
iface = gr.Interface(fn=ask_question,
                     inputs=gr.Textbox(lines=2, placeholder="Ask about handshapes..."),
                     outputs="text",
                     title="ASL Handshape RAG",
                     description="Semantic search over American Sign Language Handshape Dictionary.")

iface.launch()