Spaces:
Running
Running
| import streamlit as st | |
| import os | |
| from pdf_processing import extract_text_from_pdf | |
| from youtube_processing import extract_text_from_youtube | |
| from faiss_indexing import get_embeddings, create_faiss_index, query_faiss_index | |
| from utils import load_environment_variables, query_huggingface_api, chunk_text | |
| from pdf_generator import generate_pdf | |
| from text_to_speech import speak_text | |
| from sentence_transformers import SentenceTransformer | |
| # Load environment variables | |
| hf_token = load_environment_variables() | |
| if not hf_token: | |
| st.error("Hugging Face API token is missing. Please add it to your .env file.") | |
| st.stop() | |
| # Define the Hugging Face API endpoint | |
| API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" | |
| headers = { | |
| "Authorization": f"Bearer {hf_token}" | |
| } | |
| # Initialize the sentence transformer model | |
| model_name = 'all-MiniLM-L6-v2' | |
| model = SentenceTransformer(model_name) | |
| # Streamlit UI | |
| st.title("NoteBot - Notes Retrieval System") | |
| st.write("By - Aditya Goyal") | |
| st.write("Upload PDFs or provide YouTube links to ask questions about their content.") | |
| uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) | |
| youtube_url = st.text_input("Enter YouTube video URL:") | |
| all_chunks = [] | |
| # Process PDF files | |
| if uploaded_files: | |
| for uploaded_file in uploaded_files: | |
| pdf_path = os.path.join("temp", uploaded_file.name) | |
| if not os.path.exists("temp"): | |
| os.makedirs("temp") | |
| with open(pdf_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| text = extract_text_from_pdf(pdf_path) | |
| chunks = chunk_text(text) | |
| all_chunks.extend(chunks) | |
| # Process YouTube video | |
| if youtube_url: | |
| yt_text = extract_text_from_youtube(youtube_url) | |
| yt_chunks = chunk_text(yt_text) | |
| all_chunks.extend(yt_chunks) | |
| if all_chunks: | |
| embeddings = get_embeddings(all_chunks, model) | |
| faiss_index = create_faiss_index(embeddings) | |
| query_text = st.text_input("Enter your query:") | |
| if query_text: | |
| query_embedding = get_embeddings([query_text], model) | |
| distances, indices = query_faiss_index(faiss_index, query_embedding) | |
| similar_chunks = [all_chunks[i] for i in indices[0]] | |
| # Ensure we only use a manageable number of chunks | |
| num_chunks_to_use = min(5, len(similar_chunks)) | |
| selected_chunks = similar_chunks[:num_chunks_to_use] | |
| template = """Based on the following chunks: {similar_chunks} | |
| Question: {question} | |
| Answer:""" | |
| prompt_text = template.format(similar_chunks="\n".join(selected_chunks), question=query_text) | |
| # Generate response from Hugging Face API | |
| response = query_huggingface_api(prompt_text, API_URL, headers) | |
| if "Error" not in response: | |
| st.write("**Answer:**", response) | |
| # Add button to download response as PDF | |
| if st.button("Download Response as PDF"): | |
| pdf_path = os.path.join("temp", "response.pdf") | |
| generate_pdf(response, pdf_path) | |
| with open(pdf_path, "rb") as f: | |
| st.download_button(label="Download PDF", data=f, file_name="response.pdf") | |
| # Add button to speak the response text | |
| if st.button("Speak Response"): | |
| speak_text(response) | |
| else: | |
| st.error(response) | |