faissssss / app.py
aridepai17's picture
Update app.py
23184d5 verified
import pandas as pd
import faiss
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from gtts import gTTS
import tempfile
# Load dataset
dataset = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train")
df = pd.DataFrame(dataset)
# Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Encode questions
question_embeddings = model.encode(df['instruction'].tolist(), convert_to_numpy=True)
# Build FAISS index
dimension = question_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(question_embeddings)
# Define search function with audio output
def search_faq(user_query, k=1):
query_embedding = model.encode([user_query], convert_to_numpy=True)
distances, indices = index.search(query_embedding, k)
best_idx = indices[0][0]
answer = df.iloc[best_idx]['output']
# Convert answer to speech and save to temp file
tts = gTTS(text=answer)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
tts.save(fp.name)
return fp.name
# Gradio UI
gr.Interface(
fn=search_faq,
inputs=gr.Textbox(label="Ask about Steve Jobs", placeholder="e.g., What did Steve say about innovation?"),
outputs=gr.Audio(type="filepath"),
title="Steve Jobs FAQ Assistant (Audio)",
description="Ask questions based on Steve Jobs interviews. Response is read aloud.",
theme="soft"
).launch()