Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import faiss | |
| import gradio as gr | |
| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| from gtts import gTTS | |
| import tempfile | |
| # Load dataset | |
| dataset = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train") | |
| df = pd.DataFrame(dataset) | |
| # Load embedding model | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Encode questions | |
| question_embeddings = model.encode(df['instruction'].tolist(), convert_to_numpy=True) | |
| # Build FAISS index | |
| dimension = question_embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(question_embeddings) | |
| # Define search function with audio output | |
| def search_faq(user_query, k=1): | |
| query_embedding = model.encode([user_query], convert_to_numpy=True) | |
| distances, indices = index.search(query_embedding, k) | |
| best_idx = indices[0][0] | |
| answer = df.iloc[best_idx]['output'] | |
| # Convert answer to speech and save to temp file | |
| tts = gTTS(text=answer) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: | |
| tts.save(fp.name) | |
| return fp.name | |
| # Gradio UI | |
| gr.Interface( | |
| fn=search_faq, | |
| inputs=gr.Textbox(label="Ask about Steve Jobs", placeholder="e.g., What did Steve say about innovation?"), | |
| outputs=gr.Audio(type="filepath"), | |
| title="Steve Jobs FAQ Assistant (Audio)", | |
| description="Ask questions based on Steve Jobs interviews. Response is read aloud.", | |
| theme="soft" | |
| ).launch() | |