import pandas as pd import faiss import gradio as gr from datasets import load_dataset from sentence_transformers import SentenceTransformer from gtts import gTTS import tempfile # Load dataset dataset = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train") df = pd.DataFrame(dataset) # Load embedding model model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # Encode questions question_embeddings = model.encode(df['instruction'].tolist(), convert_to_numpy=True) # Build FAISS index dimension = question_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(question_embeddings) # Define search function with audio output def search_faq(user_query, k=1): query_embedding = model.encode([user_query], convert_to_numpy=True) distances, indices = index.search(query_embedding, k) best_idx = indices[0][0] answer = df.iloc[best_idx]['output'] # Convert answer to speech and save to temp file tts = gTTS(text=answer) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: tts.save(fp.name) return fp.name # Gradio UI gr.Interface( fn=search_faq, inputs=gr.Textbox(label="Ask about Steve Jobs", placeholder="e.g., What did Steve say about innovation?"), outputs=gr.Audio(type="filepath"), title="Steve Jobs FAQ Assistant (Audio)", description="Ask questions based on Steve Jobs interviews. Response is read aloud.", theme="soft" ).launch()