Spaces:
Build error
Build error
UPDATE: YT Transcripts
Browse files- app.py +5 -1
- functions.py +13 -1
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -155,4 +155,8 @@ async def getCount(vectorstore: str):
|
|
| 155 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 156 |
return {
|
| 157 |
"currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
| 156 |
return {
|
| 157 |
"currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
@app.post("/getYoutubeTranscript")
|
| 161 |
+
async def getYTTranscript(url: str):
|
| 162 |
+
return getTranscript(url = url)
|
functions.py
CHANGED
|
@@ -10,6 +10,7 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
| 10 |
from langchain.memory import ChatMessageHistory
|
| 11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
| 12 |
from langchain.storage import InMemoryStore
|
|
|
|
| 13 |
from langchain.docstore.document import Document
|
| 14 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 15 |
from langchain.retrievers import ContextualCompressionRetriever
|
|
@@ -294,4 +295,15 @@ def getTextFromImagePDF(pdfBytes):
|
|
| 294 |
allImages = convert_from_bytes(pdfBytes)
|
| 295 |
allImages = [np.array(image) for image in allImages]
|
| 296 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|
| 297 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from langchain.memory import ChatMessageHistory
|
| 11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
| 12 |
from langchain.storage import InMemoryStore
|
| 13 |
+
from langchain_community.document_loaders import YoutubeLoader
|
| 14 |
from langchain.docstore.document import Document
|
| 15 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 16 |
from langchain.retrievers import ContextualCompressionRetriever
|
|
|
|
| 295 |
allImages = convert_from_bytes(pdfBytes)
|
| 296 |
allImages = [np.array(image) for image in allImages]
|
| 297 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|
| 298 |
+
return text
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def getTranscript(url: str):
|
| 302 |
+
loader = YoutubeLoader.from_youtube_url(
|
| 303 |
+
url, add_video_info=False
|
| 304 |
+
)
|
| 305 |
+
try:
|
| 306 |
+
doc = " ".join([x.page_content for x in loader.load()])
|
| 307 |
+
except:
|
| 308 |
+
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
| 309 |
+
return doc
|
requirements.txt
CHANGED
|
@@ -18,6 +18,7 @@ python-dotenv
|
|
| 18 |
pydantic
|
| 19 |
pandas
|
| 20 |
easyocr
|
|
|
|
| 21 |
pdf2image
|
| 22 |
sentence-transformers
|
| 23 |
supabase
|
|
|
|
| 18 |
pydantic
|
| 19 |
pandas
|
| 20 |
easyocr
|
| 21 |
+
youtube-transcript-api
|
| 22 |
pdf2image
|
| 23 |
sentence-transformers
|
| 24 |
supabase
|