| | import os |
| | import pandas as pd |
| | import numpy as np |
| | import gradio as gr |
| | import faiss |
| | from datasets import load_dataset |
| | from sentence_transformers import SentenceTransformer |
| | from gtts import gTTS |
| | import tempfile |
| |
|
| | |
| | auth_token = os.environ.get("HF_TOKEN") |
| |
|
| | |
| | ds = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train", use_auth_token=auth_token) |
| |
|
| | |
| | data = pd.DataFrame({ |
| | 'question': ds['instruction'], |
| | 'answer': ds['output'] |
| | }) |
| |
|
| | |
| | model = SentenceTransformer("all-MiniLM-L6-v2") |
| |
|
| | |
| | faq_embeddings = model.encode(data['question'].tolist()) |
| | faq_index = faiss.IndexFlatL2(faq_embeddings.shape[1]) |
| | faq_index.add(faq_embeddings) |
| |
|
| | |
| | def get_answer_with_audio(query): |
| | query_embedding = model.encode([query]) |
| | D, I = faq_index.search(np.array(query_embedding), k=1) |
| | answer = data.iloc[I[0][0]]['answer'] |
| |
|
| | |
| | tts = gTTS(text=answer) |
| | temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") |
| | tts.save(temp_file.name) |
| |
|
| | return answer, temp_file.name |
| |
|
| | |
| | iface = gr.Interface( |
| | fn=get_answer_with_audio, |
| | inputs=gr.Textbox(placeholder="Ask a question about Steve Jobs...", label="Your Question"), |
| | outputs=[ |
| | gr.Text(label="Answer"), |
| | gr.Audio(label="Spoken Answer", type="filepath") |
| | ], |
| | title="🧠 Steve Jobs FAQ Chatbot", |
| | description="Ask anything about Steve Jobs. This chatbot answers in text and speech." |
| | ) |
| |
|
| | iface.launch() |
| |
|