arihant18 commited on
Commit
2b6c457
·
1 Parent(s): d6a98f8

Built FastAPI api's

Browse files
agents/__init__.py DELETED
File without changes
agents/retriever_agent.py CHANGED
@@ -25,9 +25,9 @@ def get_retriever_agent():
25
  name="retriever_agent",
26
  )
27
 
28
- retriever_agent = get_retriever_agent()
29
 
30
- result = retriever_agent.invoke({"messages": ["Latest news about Apple?"]})
31
 
32
- for i in result["messages"]:
33
- i.pretty_print()
 
25
  name="retriever_agent",
26
  )
27
 
28
+ # retriever_agent = get_retriever_agent()
29
 
30
+ # result = retriever_agent.invoke({"messages": ["Latest news about Apple?"]})
31
 
32
+ # for i in result["messages"]:
33
+ # i.pretty_print()
agents/voice_agent.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import speech_recognition as sr
2
+ from gtts import gTTS
3
+ from io import BytesIO
4
+ from pydub import AudioSegment
5
+
6
+
7
+ def convert_to_wav_bytes(file, format):
8
+ audio = AudioSegment.from_file(file, format=format)
9
+ wav_io = BytesIO()
10
+ audio.export(wav_io, format="wav")
11
+ wav_io.seek(0)
12
+ return wav_io
13
+
14
+ def speech_to_text(audio_bytes_io):
15
+ try:
16
+ recognizer = sr.Recognizer()
17
+ with sr.AudioFile(audio_bytes_io) as source:
18
+ audio_data = recognizer.record(source)
19
+ text = recognizer.recognize_google(audio_data)
20
+ return text
21
+ except sr.UnknownValueError:
22
+ return None
23
+
24
+ def text_to_speech(text, lang='en'):
25
+ tts = gTTS(text=text, lang=lang)
26
+ mp3_fp = BytesIO()
27
+ tts.write_to_fp(mp3_fp)
28
+ mp3_fp.seek(0)
29
+ return mp3_fp
data_ingestion/__init__.py DELETED
File without changes
data_ingestion/faiss_index/index.faiss ADDED
Binary file (49.2 kB). View file
 
data_ingestion/faiss_index/index.pkl ADDED
Binary file (4.55 kB). View file
 
data_ingestion/get_data.py CHANGED
@@ -2,8 +2,13 @@ from langchain_community.document_loaders import WebBaseLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
  from langchain_community.vectorstores import FAISS
4
  from pypdf import PdfReader
 
5
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
  import os
 
 
 
 
7
 
8
  def get_pdf_text(pdf):
9
  text=""
@@ -12,24 +17,44 @@ def get_pdf_text(pdf):
12
  text+= page.extract_text()
13
  return text
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_text_chunks(text):
17
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50)
18
  chunks = text_splitter.split_text(text)
19
  return chunks
20
 
 
 
 
 
 
 
 
21
 
 
 
 
 
22
 
23
- def create_vector_store(text:str = "Hello world!"):
24
- chunks = get_text_chunks(text)
25
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
26
  vector_store = FAISS.from_texts(chunks, embedding=embeddings)
27
- vector_store.save_local("faiss_index")
28
  return vector_store
29
 
30
  def get_vector_store():
31
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
32
- if not os.path.exists("faiss_index"):
33
  return create_vector_store()
34
- vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
35
  return vectorstore
 
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
  from langchain_community.vectorstores import FAISS
4
  from pypdf import PdfReader
5
+ from langchain_community.document_loaders import WebBaseLoader
6
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
  import os
8
+ import shutil
9
+
10
+ vectorstore_path = "data_ingestion/faiss_index"
11
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
12
 
13
  def get_pdf_text(pdf):
14
  text=""
 
17
  text+= page.extract_text()
18
  return text
19
 
20
+ def add_web_docs(urls:list[str]):
21
+ docs = [WebBaseLoader(url).load() for url in urls]
22
+ docs_list = [item for sublist in docs for item in sublist]
23
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1024, chunk_overlap=64)
24
+ doc_splits = text_splitter.split_documents(docs_list)
25
+ if not os.path.exists(vectorstore_path):
26
+ return create_vector_store()
27
+ vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
28
+ vectorstore.aadd_documents(doc_splits)
29
+ return True
30
 
31
  def get_text_chunks(text):
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
33
  chunks = text_splitter.split_text(text)
34
  return chunks
35
 
36
+ def add_to_vectore_store(text: str):
37
+ chunks = get_text_chunks(text)
38
+ if not os.path.exists(vectorstore_path):
39
+ return create_vector_store(chunks)
40
+ vector_store = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
41
+ vector_store.add_texts(chunks)
42
+ return True
43
 
44
+ def delete_vector_store():
45
+ if os.path.exists(vectorstore_path):
46
+ shutil.rmtree(vectorstore_path)
47
+ return True
48
 
49
+ def create_vector_store(chunks: list[str] = ["Hello world!"]):
 
50
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
51
  vector_store = FAISS.from_texts(chunks, embedding=embeddings)
52
+ vector_store.save_local(vectorstore_path)
53
  return vector_store
54
 
55
  def get_vector_store():
56
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
57
+ if not os.path.exists(vectorstore_path):
58
  return create_vector_store()
59
+ vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
60
  return vectorstore
faiss_index/index.faiss DELETED
Binary file (12.3 kB)
 
faiss_index/index.pkl DELETED
Binary file (349 Bytes)
 
old_code.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pypdf import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ import google.generativeai as genai
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
+ from langchain.prompts import PromptTemplate
11
+ from dotenv import load_dotenv
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+ import io
15
+ from PIL import Image
16
+ import pytesseract
17
+ import speech_recognition as sr
18
+ from gtts import gTTS
19
+ import os
20
+ from pydub import AudioSegment
21
+ from io import BytesIO
22
+ from urllib.parse import urljoin
23
+ from audio_recorder_streamlit import audio_recorder
24
+ import shutil
25
+
26
+
27
+ load_dotenv()
28
+ os.getenv("GOOGLE_API_KEY")
29
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
30
+
31
+ def convert_to_wav_bytes(file, format):
32
+ audio = AudioSegment.from_file(file, format=format)
33
+ wav_io = io.BytesIO()
34
+ audio.export(wav_io, format="wav")
35
+ wav_io.seek(0)
36
+ return wav_io
37
+
38
+ def speech_to_text(audio_bytes_io):
39
+ try:
40
+ recognizer = sr.Recognizer()
41
+ with sr.AudioFile(audio_bytes_io) as source:
42
+ audio_data = recognizer.record(source)
43
+ text = recognizer.recognize_google(audio_data)
44
+ return text
45
+ except sr.UnknownValueError:
46
+ return None
47
+
48
+ # Function for the website made without streamlit
49
+
50
+ def text_to_speech(text, lang='en'):
51
+ tts = gTTS(text=text, lang=lang)
52
+ mp3_fp = BytesIO()
53
+ tts.write_to_fp(mp3_fp)
54
+ mp3_fp.seek(0)
55
+ st.audio(mp3_fp, format='audio/mp3', autoplay=True)
56
+ return mp3_fp
57
+
58
+
59
+ def get_pdf_text(pdf):
60
+ text=""
61
+ pdf_reader= PdfReader(pdf)
62
+ for page in pdf_reader.pages:
63
+ text+= page.extract_text()
64
+ return text
65
+
66
+
67
+
68
+ def get_text_chunks(text):
69
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
70
+ chunks = text_splitter.split_text(text)
71
+ return chunks
72
+
73
+
74
+ def get_vector_store(text_chunks):
75
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
76
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
77
+ vector_store.save_local("faiss_index")
78
+ if vector_store:
79
+ return True
80
+ else:
81
+ return False
82
+
83
+
84
+ def get_conversational_chain():
85
+
86
+ prompt_template = """
87
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
88
+ provided context just say, "The Question is not related to us.", don't provide the wrong answer these context can be from any site or such so answer accordingly
89
+ the answer should be in just 2 or less lines.
90
+ if the question is any thing like thanks and hii reply it in a mannar of a smart chat bot. and you name is Smart-Chatbot, if user asks any Question related to you, no need to tell who build you.\n\n
91
+ Context:\n {context}?\n
92
+ Question: \n{question}\n
93
+
94
+ Answer:
95
+ """
96
+
97
+ model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
98
+
99
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
100
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
101
+
102
+ return chain
103
+
104
+ def user_input(user_question):
105
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-exp-03-07")
106
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
107
+ docs = new_db.similarity_search(user_question)
108
+
109
+ chain = get_conversational_chain()
110
+
111
+
112
+ response = chain(
113
+ {"input_documents":docs, "question": user_question}
114
+ , return_only_outputs=True)
115
+ out=response["output_text"]
116
+ # return out
117
+ st.write(f"Question : {user_question}")
118
+ st.write("Reply: \n", out)
119
+ text_to_speech(out ,lang='en')
120
+
121
+
122
+
123
+
124
+ def extract_text_from_image(image):
125
+ file_bytes = image.read()
126
+ image = Image.open(io.BytesIO(file_bytes))
127
+ extracted_text = pytesseract.image_to_string(image)
128
+ return extracted_text
129
+
130
+ def main():
131
+ st.set_page_config("MultiChat")
132
+ st.header("Chat with PDF, Text-Images and Sites")
133
+ col1, col2=st.columns([8, 1])
134
+ with col1:
135
+ user_question = st.text_input("Ask a Question from the context provided")
136
+ with col2:
137
+ st.write('\n')
138
+ st.write('\n')
139
+ audio=audio_recorder(
140
+ text="",
141
+ icon_size="2x",
142
+ )
143
+ if audio:
144
+ wav_bytes_io = convert_to_wav_bytes(io.BytesIO(audio))
145
+ user_question = speech_to_text(wav_bytes_io)
146
+
147
+ if user_question:
148
+ with st.spinner("Fetching the answer..."):
149
+ user_input(user_question)
150
+
151
+ with st.sidebar:
152
+ st.title("Menu:")
153
+ st.write("Use Website link:")
154
+ if st.button("Clear existing data"):
155
+ if os.path.exists("faiss_index"):
156
+ shutil.rmtree("faiss_index")
157
+ st.info("Cleared existing data.")
158
+ else:
159
+ st.info("No data to clear.")
160
+ link = st.chat_input("Paste the web link here")
161
+ if link:
162
+ with st.spinner("Processing..."):
163
+ raw_text = get_web_text(link)
164
+ if raw_text:
165
+ text_chunks = get_text_chunks(raw_text)
166
+ get_vector_store(text_chunks)
167
+ st.success("Done")
168
+
169
+ files = st.file_uploader("Upload your PDF Files or images here:", accept_multiple_files=True)
170
+ if st.button("Submit & Process"):
171
+ with st.spinner("Processing..."):
172
+ for file in files:
173
+ if file.type=='application/pdf':
174
+ raw_text = get_pdf_text(file)
175
+
176
+ elif file.type.split('/')[0]=='image':
177
+ raw_text = extract_text_from_image(file)
178
+
179
+ else:
180
+ st.write("Invalid File Type")
181
+ return
182
+ text_chunks = get_text_chunks(raw_text)
183
+ get_vector_store(text_chunks)
184
+
185
+ st.success("Done")
186
+
187
+
188
+ if __name__ == "__main__":
189
+ main()
190
+
191
+
orchestrator/main.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from fastapi.responses import StreamingResponse, JSONResponse
3
+ from orchestrator.supervisor import get_supervisor
4
+ from agents.api_agent import get_api_agent
5
+ from agents.retriever_agent import get_retriever_agent
6
+ from agents.scraping_agent import get_scraping_agent
7
+ from agents.voice_agent import *
8
+ from data_ingestion.get_data import *
9
+
10
+ app = FastAPI()
11
+
12
+ @app.post('/supervisor')
13
+ async def supervisor(Query: str):
14
+ supervisor = get_supervisor()
15
+ result = supervisor.invoke({'messages':[Query]})
16
+ return result
17
+
18
+ @app.post('/agents/api_agent')
19
+ async def api_agent(Query: str):
20
+ api_agent = get_api_agent()
21
+ result = api_agent.invoke({'messages':[Query]})
22
+ return result
23
+
24
+ @app.post('/agents/retriever_agent')
25
+ async def retriever_agent(Query: str):
26
+ retriever_agent = get_retriever_agent()
27
+ result = retriever_agent.invoke({'messages':[Query]})
28
+ return result
29
+
30
+ @app.post('/agents/scraping_agent')
31
+ async def scraping_agent(Query: str):
32
+ scraping_agent = get_scraping_agent()
33
+ result = scraping_agent.invoke({'messages':[Query]})
34
+ return result
35
+
36
+ @app.post("/agents/voice-agent/stt")
37
+ async def speech_to_text_api(file: UploadFile = File(...), format: str = Form(...)):
38
+ content = await file.read()
39
+ wav_bytes = convert_to_wav_bytes(BytesIO(content), format)
40
+ text = speech_to_text(wav_bytes)
41
+ if text is None:
42
+ return JSONResponse(status_code=400, content={"error": "Could not recognize speech"})
43
+ return {"text": text}
44
+
45
+ @app.post("/agents/voice-agent/tts")
46
+ async def text_to_speech_api(text: str = Form(...), lang: str = Form(default='en')):
47
+ mp3_bytes = text_to_speech(text, lang)
48
+ return StreamingResponse(mp3_bytes, media_type="audio/mpeg")
49
+
50
+ @app.post("/data_ingestion/pdf")
51
+ async def upload_pdf(file: UploadFile):
52
+ if file.filename.split('.')[-1]=='pdf':
53
+ raw_text = get_pdf_text(file.file)
54
+ else:
55
+ return {'error':'Unsupported file type'}
56
+ status = add_to_vectore_store(raw_text)
57
+ return {'success':status}
58
+
59
+ @app.post("/data_ingestion/urls")
60
+ async def add_web_docs(urls: list[str]):
61
+ add_web_docs(urls)
62
+ return {'success':True}
63
+
64
+
65
+ @app.get("/data_ingestion/delete_vectordb")
66
+ async def delete_vectordb():
67
+ delete_vector_store()
68
+ return {'success' : True}
69
+
70
+ @app.get('/')
71
+ async def home():
72
+ return {
73
+ "message" : "Welcome to the Multi-Source Multi-Agent Finance Assistant"
74
+ }
requirements.txt CHANGED
@@ -9,4 +9,10 @@ langchain-google-genai
9
  langgraph_supervisor
10
  faiss-cpu
11
  pypdf
12
- streamlit
 
 
 
 
 
 
 
9
  langgraph_supervisor
10
  faiss-cpu
11
  pypdf
12
+ streamlit
13
+ SpeechRecognition
14
+ gtts
15
+ pydub
16
+ fastapi
17
+ uvicorn
18
+ python-multipart