ARBAJSSHAIKH commited on
Commit
adf6185
Β·
verified Β·
1 Parent(s): 67f8901

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ import faiss
5
+ import numpy as np
6
+ from vosk import Model, KaldiRecognizer
7
+ import wave
8
+ from pydub import AudioSegment
9
+ from sentence_transformers import SentenceTransformer
10
+ from transformers import pipeline
11
+
12
+ # ------------------------------
13
+ # 1. LOAD MODELS OFFLINE
14
+ # ------------------------------
15
+
16
+ @st.cache_resource
17
+ def load_stt_model():
18
+ model_path = "vosk-model-small-en-us-0.15"
19
+ return Model(model_path)
20
+
21
+ @st.cache_resource
22
+ def load_embedding_model():
23
+ return SentenceTransformer("all-MiniLM-L6-v2")
24
+
25
+ @st.cache_resource
26
+ def load_qa_model():
27
+ return pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
28
+
29
+ stt_model = load_stt_model()
30
+ embedder = load_embedding_model()
31
+ qa_model = load_qa_model()
32
+
33
+ # ------------------------------
34
+ # 2. FUNCTIONS
35
+ # ------------------------------
36
+
37
+ def transcribe_audio(file_path):
38
+ wf = wave.open(file_path, "rb")
39
+ rec = KaldiRecognizer(stt_model, wf.getframerate())
40
+ rec.SetWords(True)
41
+
42
+ text_result = ""
43
+
44
+ while True:
45
+ data = wf.readframes(4000)
46
+ if len(data) == 0:
47
+ break
48
+ if rec.AcceptWaveform(data):
49
+ res = rec.Result()
50
+ text_result += json.loads(res)["text"] + " "
51
+
52
+ final_res = rec.FinalResult()
53
+ text_result += json.loads(final_res)["text"]
54
+
55
+ return text_result.strip()
56
+
57
+
58
+ def convert_to_wav(uploaded_file):
59
+ audio = AudioSegment.from_file(uploaded_file)
60
+ output_path = "temp.wav"
61
+ audio.export(output_path, format="wav")
62
+ return output_path
63
+
64
+
65
+ def save_text(text):
66
+ with open(f"transcripts/data.txt", "a", encoding="utf-8") as f:
67
+ f.write(text + "\n")
68
+
69
+
70
+ def build_vector_db():
71
+ with open("transcripts/data.txt", "r", encoding="utf-8") as f:
72
+ docs = f.readlines()
73
+
74
+ embeddings = embedder.encode(docs)
75
+ index = faiss.IndexFlatL2(embeddings.shape[1])
76
+ index.add(np.array(embeddings).astype("float32"))
77
+ return docs, index
78
+
79
+
80
+ def retrieve(query, docs, index, top_k=3):
81
+ q_emb = embedder.encode([query])
82
+ D, I = index.search(np.array(q_emb).astype("float32"), top_k)
83
+ results = [docs[i] for i in I[0]]
84
+ return " ".join(results)
85
+
86
+
87
+ # ------------------------------
88
+ # 3. STREAMLIT UI
89
+ # ------------------------------
90
+
91
+ st.title("πŸ”΄ Offline GenAI RAG from Audio (No API β€’ No Internet)")
92
+ st.write("🎀 Upload or record audio β†’ πŸ“„ Convert to text β†’ πŸ€– Ask questions offline")
93
+
94
+ menu = st.sidebar.radio("Navigation", ["Upload Audio", "Ask Questions"])
95
+
96
+ # -----------------------------------
97
+ # UPLOAD AUDIO PAGE
98
+ # -----------------------------------
99
+ if menu == "Upload Audio":
100
+ st.header("🎀 Upload or Record Audio")
101
+
102
+ audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "m4a"])
103
+
104
+ if audio_file:
105
+ st.success("File uploaded successfully")
106
+
107
+ wav_path = convert_to_wav(audio_file)
108
+
109
+ st.info("Transcribing offline... please wait")
110
+
111
+ text = transcribe_audio(wav_path)
112
+
113
+ st.subheader("πŸ“ Transcribed Text")
114
+ st.write(text)
115
+
116
+ save_text(text)
117
+
118
+ st.success("Saved locally in transcripts/ folder")
119
+
120
+ # -----------------------------------
121
+ # ASK QUESTION PAGE
122
+ # -----------------------------------
123
+ if menu == "Ask Questions":
124
+ st.header("❓ Ask Questions From Your Audio Knowledge Base")
125
+
126
+ docs, index = build_vector_db()
127
+
128
+ user_q = st.text_input("Enter your question")
129
+
130
+ if st.button("Get Answer"):
131
+ context = retrieve(user_q, docs, index)
132
+ result = qa_model(question=user_q, context=context)
133
+
134
+ st.subheader("🧠 Answer")
135
+ st.write(result["answer"])
136
+
137
+ st.caption("Based only on your stored audio transcriptions")