tts and doc update
Browse files- App/Embedding/EmbeddingRoutes.py +6 -3
- App/Embedding/utils/Initialize.py +47 -26
- App/TTS/Schemas.py +28 -0
- App/TTS/TTSRoutes.py +27 -0
- App/TTS/utils/Podcastle.py +140 -0
- App/TTS/utils/__init__.py +0 -0
- App/app.py +2 -1
App/Embedding/EmbeddingRoutes.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from fastapi import APIRouter
|
| 2 |
|
| 3 |
from .utils.Initialize import TextSearch, IdSearch
|
| 4 |
from .Schemas import SearchRequest, AddDocumentRequest
|
|
@@ -13,8 +13,11 @@ async def create_embeddings(req: AddDocumentRequest):
|
|
| 13 |
|
| 14 |
|
| 15 |
@embeddigs_router.post("/search_id")
|
| 16 |
-
async def search_id(
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
@embeddigs_router.post("/search_text")
|
|
|
|
| 1 |
+
from fastapi import APIRouter, BackgroundTasks
|
| 2 |
|
| 3 |
from .utils.Initialize import TextSearch, IdSearch
|
| 4 |
from .Schemas import SearchRequest, AddDocumentRequest
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
@embeddigs_router.post("/search_id")
|
| 16 |
+
async def search_id(
|
| 17 |
+
req: SearchRequest,
|
| 18 |
+
background_tasks: BackgroundTasks,
|
| 19 |
+
):
|
| 20 |
+
return IdSearch(query=req.query, background_task=background_tasks)
|
| 21 |
|
| 22 |
|
| 23 |
@embeddigs_router.post("/search_text")
|
App/Embedding/utils/Initialize.py
CHANGED
|
@@ -1,52 +1,73 @@
|
|
| 1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 2 |
from langchain.docstore.document import Document
|
| 3 |
from langchain.vectorstores import Pinecone
|
| 4 |
-
import
|
| 5 |
-
import
|
|
|
|
| 6 |
from .Elastic import FetchDocuments
|
| 7 |
|
| 8 |
|
| 9 |
-
index_name =
|
| 10 |
model_name = "thenlper/gte-base"
|
| 11 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 12 |
|
| 13 |
-
TMDB_API=os.environ.get(
|
| 14 |
|
| 15 |
# get api key from app.pinecone.io
|
| 16 |
-
PINECONE_API_KEY = os.environ.get(
|
| 17 |
# find your environment next to the api key in pinecone console
|
| 18 |
-
PINECONE_ENV = os.environ.get(
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
pinecone.init(
|
| 21 |
-
api_key=PINECONE_API_KEY,
|
| 22 |
-
environment=PINECONE_ENV
|
| 23 |
-
)
|
| 24 |
|
| 25 |
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
| 26 |
|
| 27 |
-
def generate_text(doc):
|
| 28 |
-
if doc['tv_results']:
|
| 29 |
-
return pprint.pformat(doc['tv_results'][0]),doc['tv_results'][0]
|
| 30 |
-
return pprint.pformat(doc['movie_results'][0]),doc['movie_results'][0]
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
-
def IdSearch(query:str):
|
| 35 |
-
doc=requests.get(
|
|
|
|
|
|
|
| 36 |
try:
|
| 37 |
-
text,props=generate_text(doc)
|
| 38 |
except Exception as e:
|
| 39 |
print(e)
|
| 40 |
return []
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
|
| 45 |
-
def TextSearch(query: str,filter=None):
|
| 46 |
-
docs = docsearch.similarity_search(query,k=10,filter=filter)
|
| 47 |
-
keys= [
|
| 48 |
return FetchDocuments(keys)
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 2 |
from langchain.docstore.document import Document
|
| 3 |
from langchain.vectorstores import Pinecone
|
| 4 |
+
from fastapi import BackgroundTasks
|
| 5 |
+
import os, requests
|
| 6 |
+
import pinecone, pprint
|
| 7 |
from .Elastic import FetchDocuments
|
| 8 |
|
| 9 |
|
| 10 |
+
index_name = "movie-recommender-fast"
|
| 11 |
model_name = "thenlper/gte-base"
|
| 12 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 13 |
|
| 14 |
+
TMDB_API = os.environ.get("TMDB_API")
|
| 15 |
|
| 16 |
# get api key from app.pinecone.io
|
| 17 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
| 18 |
# find your environment next to the api key in pinecone console
|
| 19 |
+
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
| 20 |
+
|
| 21 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
| 22 |
+
vector_index = pinecone.Index(index_name=index_name)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
def check_if_exists(imdb_id):
|
| 29 |
+
results = vector_index.query(filter={"key": {"$eq": imdb_id}}, top_k=1)
|
| 30 |
+
if results:
|
| 31 |
+
return True
|
| 32 |
+
else:
|
| 33 |
+
return False
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def add_document(imdb_id, doc):
|
| 37 |
+
response = check_if_exists(imdb_id=imdb_id)
|
| 38 |
+
if response:
|
| 39 |
+
print("document exists")
|
| 40 |
+
return
|
| 41 |
+
text, temp_doc = doc
|
| 42 |
+
temp_doc["key"] = imdb_id
|
| 43 |
+
temp = Document(
|
| 44 |
+
page_content=text,
|
| 45 |
+
metadata=temp_doc,
|
| 46 |
+
)
|
| 47 |
+
print("document added")
|
| 48 |
+
docsearch.add_documents([temp])
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def generate_text(doc):
|
| 52 |
+
if doc["tv_results"]:
|
| 53 |
+
return pprint.pformat(doc["tv_results"][0]), doc["tv_results"][0]
|
| 54 |
+
return pprint.pformat(doc["movie_results"][0]), doc["movie_results"][0]
|
| 55 |
|
| 56 |
|
| 57 |
+
def IdSearch(query: str, background_task: BackgroundTasks):
|
| 58 |
+
doc = requests.get(
|
| 59 |
+
f"https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}"
|
| 60 |
+
).json()
|
| 61 |
try:
|
| 62 |
+
text, props = generate_text(doc)
|
| 63 |
except Exception as e:
|
| 64 |
print(e)
|
| 65 |
return []
|
| 66 |
+
background_task.add_task(add_document, imdb_id=query, doc=(text, props))
|
| 67 |
+
return TextSearch(text, filter={"key": {"$ne": query}})
|
| 68 |
|
| 69 |
|
| 70 |
+
def TextSearch(query: str, filter=None):
|
| 71 |
+
docs = docsearch.similarity_search(query, k=10, filter=filter)
|
| 72 |
+
keys = [doc.metadata["key"] for doc in docs]
|
| 73 |
return FetchDocuments(keys)
|
|
|
|
|
|
|
|
|
|
|
|
App/TTS/Schemas.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel,Field
|
| 2 |
+
from typing import List,Optional
|
| 3 |
+
import uuid
|
| 4 |
+
|
| 5 |
+
class Speak(BaseModel):
|
| 6 |
+
paragraphId: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
| 7 |
+
speaker: str
|
| 8 |
+
text: str
|
| 9 |
+
voiceId: str = Field(default="c60166365edf46589657770d", alias="speaker") # Default speaker value
|
| 10 |
+
|
| 11 |
+
def __init__(self, **data):
|
| 12 |
+
data["text"] = data.get('text') if '<speak>' in data.get('text') else f"<speak>{data.get('text')}</speak>"
|
| 13 |
+
super().__init__(**data)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TTSGenerateRequest(BaseModel):
|
| 18 |
+
paragraphs: List[Speak]
|
| 19 |
+
requestId: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
| 20 |
+
workspaceId: str =Field(default_factory=lambda: str(uuid.uuid4()))
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class StatusRequest(BaseModel):
|
| 24 |
+
requestId: str
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class GetTranscriptions(BaseModel):
|
| 28 |
+
userId: int
|
App/TTS/TTSRoutes.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
from .Schemas import StatusRequest, TTSGenerateRequest
|
| 5 |
+
from .utils.Podcastle import PodcastleAPI
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
tts_router = APIRouter(tags=["TTS"])
|
| 9 |
+
data = {"username": os.environ.get("USERNAME"), "password": os.environ.get("PASSWORD")}
|
| 10 |
+
tts = PodcastleAPI(**data)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
#
|
| 14 |
+
@tts_router.post("/generate_tts")
|
| 15 |
+
async def generate_voice(req: TTSGenerateRequest):
|
| 16 |
+
print("here --entered!")
|
| 17 |
+
return await tts.make_request(req)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@tts_router.post("/status")
|
| 21 |
+
async def search_id(req: StatusRequest):
|
| 22 |
+
return await tts.check_status(req)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# @tts_router.post("/search_text")
|
| 26 |
+
# async def search_text(req: SearchRequest):
|
| 27 |
+
# return TextSearch(query=req.query)
|
App/TTS/utils/Podcastle.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import aiohttp
|
| 2 |
+
import asyncio
|
| 3 |
+
from App.TTS.Schemas import TTSGenerateRequest,StatusRequest
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
class PodcastleAPI:
|
| 7 |
+
def __init__(self, username, password):
|
| 8 |
+
self.base_url = "https://podcastle.ai/api"
|
| 9 |
+
self.username = username
|
| 10 |
+
self.password = password
|
| 11 |
+
self.headers = {
|
| 12 |
+
'authority': 'podcastle.ai',
|
| 13 |
+
'accept': '*/*',
|
| 14 |
+
'accept-language': 'en-US,en;q=0.9',
|
| 15 |
+
'cache-control': 'no-cache',
|
| 16 |
+
'content-type': 'application/json',
|
| 17 |
+
# Add your other headers here
|
| 18 |
+
}
|
| 19 |
+
self.session = None # Initialize the session in the constructor
|
| 20 |
+
self.access_token = None
|
| 21 |
+
|
| 22 |
+
async def create_session(self):
|
| 23 |
+
self.session = aiohttp.ClientSession(headers=self.headers)
|
| 24 |
+
|
| 25 |
+
async def close_session(self):
|
| 26 |
+
if self.session:
|
| 27 |
+
await self.session.close()
|
| 28 |
+
|
| 29 |
+
async def signin(self):
|
| 30 |
+
url = f"{self.base_url}/auth/signin"
|
| 31 |
+
payload = {
|
| 32 |
+
"username": self.username,
|
| 33 |
+
"password": self.password
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
if not self.session:
|
| 37 |
+
await self.create_session()
|
| 38 |
+
|
| 39 |
+
async with self.session.post(url, json=payload) as response:
|
| 40 |
+
response_data = await response.json()
|
| 41 |
+
self.access_token = response_data['auth']['accessToken']
|
| 42 |
+
return response_data
|
| 43 |
+
|
| 44 |
+
async def make_request(self, tts_request: TTSGenerateRequest):
|
| 45 |
+
if not self.session:
|
| 46 |
+
await self.create_session()
|
| 47 |
+
|
| 48 |
+
if not self.access_token:
|
| 49 |
+
await self.signin()
|
| 50 |
+
|
| 51 |
+
headers_with_auth = self.headers.copy()
|
| 52 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
| 53 |
+
|
| 54 |
+
url = f"{self.base_url}/speech/text-to-speech"
|
| 55 |
+
|
| 56 |
+
async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as response:
|
| 57 |
+
if response.status == 401:
|
| 58 |
+
# If a 401 error is encountered, sign in again to update the access token
|
| 59 |
+
await self.signin()
|
| 60 |
+
# Retry the request with the updated access token
|
| 61 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
| 62 |
+
async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as retry_response:
|
| 63 |
+
response_text = await retry_response.json()
|
| 64 |
+
return response_text
|
| 65 |
+
else:
|
| 66 |
+
response_text = await response.json()
|
| 67 |
+
return response_text
|
| 68 |
+
|
| 69 |
+
async def check_status(self, tts_status: StatusRequest):
|
| 70 |
+
if not self.session:
|
| 71 |
+
await self.create_session()
|
| 72 |
+
|
| 73 |
+
if not self.access_token:
|
| 74 |
+
await self.signin()
|
| 75 |
+
|
| 76 |
+
headers_with_auth = self.headers.copy()
|
| 77 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
| 78 |
+
|
| 79 |
+
url = f"{self.base_url}/speech/text-to-speech/{tts_status.requestId}"
|
| 80 |
+
|
| 81 |
+
async with self.session.get(url, headers=headers_with_auth) as response:
|
| 82 |
+
if response.status == 401:
|
| 83 |
+
# If a 401 error is encountered, sign in again to update the access token
|
| 84 |
+
await self.signin()
|
| 85 |
+
# Retry the request with the updated access token
|
| 86 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
| 87 |
+
async with self.session.get(url, headers=headers_with_auth) as retry_response:
|
| 88 |
+
response_text = await retry_response.json()
|
| 89 |
+
return response_text
|
| 90 |
+
else:
|
| 91 |
+
response_text = await response.json()
|
| 92 |
+
return response_text
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
async def __aenter__(self):
|
| 98 |
+
if not self.session:
|
| 99 |
+
await self.create_session()
|
| 100 |
+
return self
|
| 101 |
+
|
| 102 |
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
| 103 |
+
await self.close_session()
|
| 104 |
+
|
| 105 |
+
# Example usage:
|
| 106 |
+
if __name__ == "__main__":
|
| 107 |
+
class Speak(BaseModel):
|
| 108 |
+
paragraphId: str
|
| 109 |
+
text: str
|
| 110 |
+
speaker: str
|
| 111 |
+
|
| 112 |
+
class TTSGenerateRequest(BaseModel):
|
| 113 |
+
paragraphs: [Speak]
|
| 114 |
+
requestId: str
|
| 115 |
+
workspaceId: str
|
| 116 |
+
|
| 117 |
+
async def main():
|
| 118 |
+
username = "veyivib549@gronasu.com"
|
| 119 |
+
password = "k7bNvgmJUda3yEG"
|
| 120 |
+
|
| 121 |
+
# Create a TTSGenerateRequest object
|
| 122 |
+
tts_request = TTSGenerateRequest(
|
| 123 |
+
paragraphs=[
|
| 124 |
+
Speak(
|
| 125 |
+
paragraphId="6f05p",
|
| 126 |
+
text="<speak>Hey Daniel. Are you ok?. Manchester United almost lost yesterday </speak>",
|
| 127 |
+
speaker="c60166365edf46589657770d"
|
| 128 |
+
)
|
| 129 |
+
],
|
| 130 |
+
requestId="7d6018ae-9617-4d22-879f-5e67283fa140",
|
| 131 |
+
workspaceId="f84fd58e-2899-4531-9f51-77c155c1e294"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
async with PodcastleAPI(username, password) as podcastle_api:
|
| 135 |
+
# Make the TTS request using the TTSGenerateRequest object
|
| 136 |
+
response_text = await podcastle_api.make_request(tts_request)
|
| 137 |
+
print(response_text)
|
| 138 |
+
|
| 139 |
+
loop = asyncio.get_event_loop()
|
| 140 |
+
loop.run_until_complete(main())
|
App/TTS/utils/__init__.py
ADDED
|
File without changes
|
App/app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
|
| 3 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 4 |
-
|
| 5 |
|
| 6 |
from .Embedding.EmbeddingRoutes import embeddigs_router
|
| 7 |
|
|
@@ -39,3 +39,4 @@ async def landing_page():
|
|
| 39 |
|
| 40 |
|
| 41 |
app.include_router(embeddigs_router)
|
|
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
|
| 3 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 4 |
+
from .TTS.TTSRoutes import tts_router
|
| 5 |
|
| 6 |
from .Embedding.EmbeddingRoutes import embeddigs_router
|
| 7 |
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
app.include_router(embeddigs_router)
|
| 42 |
+
app.include_router(tts_router)
|