Spaces:

Ayesha931
/

ParentHug

Sleeping

App Files Files Community

ParentHug / app.py

Ayesha931

Update app.py

1f14237 verified over 1 year ago

raw

history blame contribute delete

3.28 kB

	import streamlit as st
	import os
	import speech_recognition as sr
	import fitz # PyMuPDF
	from transformers import AutoTokenizer, AutoModel
	import torch
	import faiss
	import numpy as np
	from gtts import gTTS
	from pydub import AudioSegment

	# Function to convert audio file to text
	def audio_to_text(audio_file):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio)
	return text
	except sr.UnknownValueError:
	return "Sorry, I did not understand the audio"
	except sr.RequestError:
	return "Sorry, there was a problem with the request"

	# Function to convert audio to WAV format
	def convert_to_wav(audio_file_path):
	audio = AudioSegment.from_file(audio_file_path)
	wav_path = "temp_audio.wav"
	audio.export(wav_path, format="wav")
	return wav_path

	# Function to extract text from a PDF file
	def extract_text_from_pdf(pdf_file):
	text = ""
	pdf_document = fitz.open(pdf_file)
	for page_num in range(len(pdf_document)):
	page = pdf_document.load_page(page_num)
	text += page.get_text()
	return text

	# Function to embed text using a transformer model
	def embed_text(texts, model, tokenizer):
	inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
	with torch.no_grad():
	embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
	return embeddings

	# Function to convert text to speech
	def text_to_speech(text, output_file):
	tts = gTTS(text=text, lang='en')
	tts.save(output_file)
	return output_file

	# Initialize model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
	model = AutoModel.from_pretrained("bert-base-uncased")

	# Initialize vector database
	dimension = 768 # Size of BERT embeddings
	index = faiss.IndexFlatL2(dimension)

	# Folder path containing PDFs
	pdf_folder_path = "pdfsforRAG"

	# Read all PDF files from the specified folder
	pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]

	texts = []
	for path in pdf_paths:
	pdf_text = extract_text_from_pdf(path)
	texts.append(pdf_text)

	# Embed PDF texts and add to vector database
	embeddings = embed_text(texts, model, tokenizer)
	index.add(embeddings)

	# Streamlit application
	st.title("Parenting Guide App")

	# Upload an audio file
	audio_file = st.file_uploader("Record and upload your audio file (WAV/MP3)", type=["wav", "mp3"])

	if audio_file:
	st.write("Processing...")

	# Save the uploaded audio file
	with open("temp_audio.mp3", "wb") as f:
	f.write(audio_file.getbuffer())

	# Convert audio to WAV format if needed
	wav_path = convert_to_wav("temp_audio.mp3")

	# Convert audio to text
	text = audio_to_text(wav_path)
	st.write("Voice command:", text)

	# Find relevant advice
	query_embedding = embed_text([text], model, tokenizer)
	D, I = index.search(query_embedding, k=1) # Search for the most similar advice
	closest_text = texts[I[0][0]]

	st.write("Advice:", closest_text)

	# Convert advice to speech
	output_file = "advice.mp3"
	output_path = text_to_speech(closest_text, output_file)
	st.audio(output_path)