Spaces:
Sleeping
Sleeping
UPDATE: YT Transcripts
Browse files- app.py +17 -1
- functions.py +9 -1
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 8 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
| 9 |
|
| 10 |
|
|
|
|
| 11 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
| 12 |
app.add_middleware(
|
| 13 |
CORSMiddleware,
|
|
@@ -159,4 +160,19 @@ async def getCount(vectorstore: str):
|
|
| 159 |
|
| 160 |
@app.post("/getYoutubeTranscript")
|
| 161 |
async def getYTTranscript(url: str):
|
| 162 |
-
return getTranscript(url = url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
| 9 |
|
| 10 |
|
| 11 |
+
|
| 12 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
| 13 |
app.add_middleware(
|
| 14 |
CORSMiddleware,
|
|
|
|
| 160 |
|
| 161 |
@app.post("/getYoutubeTranscript")
|
| 162 |
async def getYTTranscript(url: str):
|
| 163 |
+
return getTranscript(url = url)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@app.post("/analyzeData")
|
| 167 |
+
async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
|
| 168 |
+
extension = file.name.split(".")[-1]
|
| 169 |
+
if extension in ["xls", "xlsx", "xlsm", "xlsb"]:
|
| 170 |
+
df = pd.read_excel(io.BytesIO(file.read()))
|
| 171 |
+
return analyzeData(query = query, dataframe = df)
|
| 172 |
+
elif extension == "csv":
|
| 173 |
+
df = pd.read_csv(io.BytesIO(file.read()))
|
| 174 |
+
return analyzeData(query = query, dataframe = df)
|
| 175 |
+
else:
|
| 176 |
+
return {
|
| 177 |
+
"output": "INVALID FILE TYPE"
|
| 178 |
+
}
|
functions.py
CHANGED
|
@@ -8,6 +8,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|
| 8 |
from langchain.retrievers import ParentDocumentRetriever
|
| 9 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 10 |
from langchain.memory import ChatMessageHistory
|
|
|
|
| 11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
| 12 |
from langchain.storage import InMemoryStore
|
| 13 |
from langchain_community.document_loaders import YoutubeLoader
|
|
@@ -298,7 +299,7 @@ def getTextFromImagePDF(pdfBytes):
|
|
| 298 |
return text
|
| 299 |
|
| 300 |
|
| 301 |
-
def getTranscript(
|
| 302 |
loader = YoutubeLoader.from_youtube_url(
|
| 303 |
url, add_video_info=False
|
| 304 |
)
|
|
@@ -307,3 +308,10 @@ def getTranscript(url: str):
|
|
| 307 |
except:
|
| 308 |
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
| 309 |
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from langchain.retrievers import ParentDocumentRetriever
|
| 9 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 10 |
from langchain.memory import ChatMessageHistory
|
| 11 |
+
from pandasai import SmartDataframe
|
| 12 |
from langchain_core.chat_history import BaseChatMessageHistory
|
| 13 |
from langchain.storage import InMemoryStore
|
| 14 |
from langchain_community.document_loaders import YoutubeLoader
|
|
|
|
| 299 |
return text
|
| 300 |
|
| 301 |
|
| 302 |
+
def getTranscript(urls: dict[str, str]):
|
| 303 |
loader = YoutubeLoader.from_youtube_url(
|
| 304 |
url, add_video_info=False
|
| 305 |
)
|
|
|
|
| 308 |
except:
|
| 309 |
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
| 310 |
return doc
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def analyzeData(query, dataframe):
|
| 314 |
+
llm = ChatGroq("gemma2-9b-it")
|
| 315 |
+
df = SmartDataframe(dataframe, config = {"llm": llm, "verbose": False})
|
| 316 |
+
response = df.chat(query)
|
| 317 |
+
return response
|
requirements.txt
CHANGED
|
@@ -16,6 +16,7 @@ numpy
|
|
| 16 |
PyPDF2
|
| 17 |
python-dotenv
|
| 18 |
pydantic
|
|
|
|
| 19 |
pandas
|
| 20 |
easyocr
|
| 21 |
youtube-transcript-api
|
|
|
|
| 16 |
PyPDF2
|
| 17 |
python-dotenv
|
| 18 |
pydantic
|
| 19 |
+
pandasas
|
| 20 |
pandas
|
| 21 |
easyocr
|
| 22 |
youtube-transcript-api
|