Spaces:

Yakova
/

Embedding

Running

App Files Files Community

Mbonea commited on Oct 10, 2023

Commit

32989ce

1 Parent(s): 50d8f5b

tts and doc update

Browse files

Files changed (7) hide show

App/Embedding/EmbeddingRoutes.py +6 -3
App/Embedding/utils/Initialize.py +47 -26
App/TTS/Schemas.py +28 -0
App/TTS/TTSRoutes.py +27 -0
App/TTS/utils/Podcastle.py +140 -0
App/TTS/utils/__init__.py +0 -0
App/app.py +2 -1

App/Embedding/EmbeddingRoutes.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import APIRouter
 from .utils.Initialize import TextSearch, IdSearch
 from .Schemas import SearchRequest, AddDocumentRequest
@@ -13,8 +13,11 @@ async def create_embeddings(req: AddDocumentRequest):
 @embeddigs_router.post("/search_id")
-async def search_id(req: SearchRequest):
-    return IdSearch(query=req.query)
 @embeddigs_router.post("/search_text")

+from fastapi import APIRouter, BackgroundTasks
 from .utils.Initialize import TextSearch, IdSearch
 from .Schemas import SearchRequest, AddDocumentRequest
 @embeddigs_router.post("/search_id")
+async def search_id(
+    req: SearchRequest,
+    background_tasks: BackgroundTasks,
+):
+    return IdSearch(query=req.query, background_task=background_tasks)
 @embeddigs_router.post("/search_text")

App/Embedding/utils/Initialize.py CHANGED Viewed

@@ -1,52 +1,73 @@
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.docstore.document import Document
 from langchain.vectorstores import Pinecone
-import os,requests
-import pinecone,pprint
 from .Elastic import FetchDocuments
-index_name = 'movie-recommender-fast'
 model_name = "thenlper/gte-base"
 embeddings = HuggingFaceEmbeddings(model_name=model_name)
-TMDB_API=os.environ.get('TMDB_API')
 # get api key from app.pinecone.io
-PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
 # find your environment next to the api key in pinecone console
-PINECONE_ENV = os.environ.get('PINECONE_ENVIRONMENT')
-pinecone.init(
-    api_key=PINECONE_API_KEY,
-    environment=PINECONE_ENV
-)
 docsearch = Pinecone.from_existing_index(index_name, embeddings)
-def generate_text(doc):
-  if doc['tv_results']:
-    return pprint.pformat(doc['tv_results'][0]),doc['tv_results'][0]
-  return pprint.pformat(doc['movie_results'][0]),doc['movie_results'][0]
-def IdSearch(query:str):
-    doc=requests.get(f'https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}').json()
     try:
-        text,props=generate_text(doc)
     except Exception as e:
         print(e)
         return []
-    return TextSearch(text,filter={"key": {"$ne":query}})
-def TextSearch(query: str,filter=None):
-    docs = docsearch.similarity_search(query,k=10,filter=filter)
-    keys= [ doc.metadata['key'] for doc in docs ]
     return FetchDocuments(keys)

 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.docstore.document import Document
 from langchain.vectorstores import Pinecone
+from fastapi import BackgroundTasks
+import os, requests
+import pinecone, pprint
 from .Elastic import FetchDocuments
+index_name = "movie-recommender-fast"
 model_name = "thenlper/gte-base"
 embeddings = HuggingFaceEmbeddings(model_name=model_name)
+TMDB_API = os.environ.get("TMDB_API")
 # get api key from app.pinecone.io
+PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
 # find your environment next to the api key in pinecone console
+PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
+pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
+vector_index = pinecone.Index(index_name=index_name)
 docsearch = Pinecone.from_existing_index(index_name, embeddings)
+def check_if_exists(imdb_id):
+    results = vector_index.query(filter={"key": {"$eq": imdb_id}}, top_k=1)
+    if results:
+        return True
+    else:
+        return False
+def add_document(imdb_id, doc):
+    response = check_if_exists(imdb_id=imdb_id)
+    if response:
+        print("document exists")
+        return
+    text, temp_doc = doc
+    temp_doc["key"] = imdb_id
+    temp = Document(
+        page_content=text,
+        metadata=temp_doc,
+    )
+    print("document added")
+    docsearch.add_documents([temp])
+def generate_text(doc):
+    if doc["tv_results"]:
+        return pprint.pformat(doc["tv_results"][0]), doc["tv_results"][0]
+    return pprint.pformat(doc["movie_results"][0]), doc["movie_results"][0]
+def IdSearch(query: str, background_task: BackgroundTasks):
+    doc = requests.get(
+        f"https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}"
+    ).json()
     try:
+        text, props = generate_text(doc)
     except Exception as e:
         print(e)
         return []
+    background_task.add_task(add_document, imdb_id=query, doc=(text, props))
+    return TextSearch(text, filter={"key": {"$ne": query}})
+def TextSearch(query: str, filter=None):
+    docs = docsearch.similarity_search(query, k=10, filter=filter)
+    keys = [doc.metadata["key"] for doc in docs]
     return FetchDocuments(keys)

App/TTS/Schemas.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pydantic import BaseModel,Field
+from typing import List,Optional
+import uuid
+class Speak(BaseModel):
+    paragraphId: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    speaker: str
+    text: str
+    voiceId: str = Field(default="c60166365edf46589657770d", alias="speaker") # Default speaker value
+    def __init__(self, **data):
+        data["text"] = data.get('text') if  '<speak>' in data.get('text') else f"<speak>{data.get('text')}</speak>"
+        super().__init__(**data)
+class TTSGenerateRequest(BaseModel):
+    paragraphs: List[Speak]
+    requestId: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    workspaceId: str =Field(default_factory=lambda: str(uuid.uuid4()))
+class StatusRequest(BaseModel):
+    requestId: str
+class GetTranscriptions(BaseModel):
+    userId: int

App/TTS/TTSRoutes.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from fastapi import APIRouter
+from .Schemas import StatusRequest, TTSGenerateRequest
+from .utils.Podcastle import PodcastleAPI
+import os
+tts_router = APIRouter(tags=["TTS"])
+data = {"username": os.environ.get("USERNAME"), "password": os.environ.get("PASSWORD")}
+tts = PodcastleAPI(**data)
+#
+@tts_router.post("/generate_tts")
+async def generate_voice(req: TTSGenerateRequest):
+    print("here --entered!")
+    return await tts.make_request(req)
+@tts_router.post("/status")
+async def search_id(req: StatusRequest):
+    return await tts.check_status(req)
+# @tts_router.post("/search_text")
+# async def search_text(req: SearchRequest):
+#     return TextSearch(query=req.query)

App/TTS/utils/Podcastle.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import aiohttp
+import asyncio
+from App.TTS.Schemas import TTSGenerateRequest,StatusRequest
+from pydantic import BaseModel
+class PodcastleAPI:
+    def __init__(self, username, password):
+        self.base_url = "https://podcastle.ai/api"
+        self.username = username
+        self.password = password
+        self.headers = {
+            'authority': 'podcastle.ai',
+            'accept': '*/*',
+            'accept-language': 'en-US,en;q=0.9',
+            'cache-control': 'no-cache',
+            'content-type': 'application/json',
+            # Add your other headers here
+        }
+        self.session = None  # Initialize the session in the constructor
+        self.access_token = None
+    async def create_session(self):
+        self.session = aiohttp.ClientSession(headers=self.headers)
+    async def close_session(self):
+        if self.session:
+            await self.session.close()
+    async def signin(self):
+        url = f"{self.base_url}/auth/signin"
+        payload = {
+            "username": self.username,
+            "password": self.password
+        }
+        if not self.session:
+            await self.create_session()
+        async with self.session.post(url, json=payload) as response:
+            response_data = await response.json()
+            self.access_token = response_data['auth']['accessToken']
+            return response_data
+    async def make_request(self, tts_request: TTSGenerateRequest):
+        if not self.session:
+            await self.create_session()
+        if not self.access_token:
+            await self.signin()
+        headers_with_auth = self.headers.copy()
+        headers_with_auth['authorization'] = f"Bearer {self.access_token}"
+        url = f"{self.base_url}/speech/text-to-speech"
+        async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as response:
+            if response.status == 401:
+                # If a 401 error is encountered, sign in again to update the access token
+                await self.signin()
+                # Retry the request with the updated access token
+                headers_with_auth['authorization'] = f"Bearer {self.access_token}"
+                async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as retry_response:
+                    response_text = await retry_response.json()
+                    return response_text
+            else:
+                response_text = await response.json()
+                return response_text
+    async def check_status(self, tts_status: StatusRequest):
+        if not self.session:
+            await self.create_session()
+        if not self.access_token:
+            await self.signin()
+        headers_with_auth = self.headers.copy()
+        headers_with_auth['authorization'] = f"Bearer {self.access_token}"
+        url = f"{self.base_url}/speech/text-to-speech/{tts_status.requestId}"
+        async with self.session.get(url, headers=headers_with_auth) as response:
+            if response.status == 401:
+                # If a 401 error is encountered, sign in again to update the access token
+                await self.signin()
+                # Retry the request with the updated access token
+                headers_with_auth['authorization'] = f"Bearer {self.access_token}"
+                async with self.session.get(url, headers=headers_with_auth) as retry_response:
+                    response_text = await retry_response.json()
+                    return response_text
+            else:
+                response_text = await response.json()
+                return response_text
+    async def __aenter__(self):
+        if not self.session:
+            await self.create_session()
+        return self
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.close_session()
+# Example usage:
+if __name__ == "__main__":
+    class Speak(BaseModel):
+        paragraphId: str
+        text: str
+        speaker: str
+    class TTSGenerateRequest(BaseModel):
+        paragraphs: [Speak]
+        requestId: str
+        workspaceId: str
+    async def main():
+        username = "veyivib549@gronasu.com"
+        password = "k7bNvgmJUda3yEG"
+        # Create a TTSGenerateRequest object
+        tts_request = TTSGenerateRequest(
+            paragraphs=[
+                Speak(
+                    paragraphId="6f05p",
+                    text="<speak>Hey Daniel. Are you ok?. Manchester United almost lost yesterday  </speak>",
+                    speaker="c60166365edf46589657770d"
+                )
+            ],
+            requestId="7d6018ae-9617-4d22-879f-5e67283fa140",
+            workspaceId="f84fd58e-2899-4531-9f51-77c155c1e294"
+        )
+        async with PodcastleAPI(username, password) as podcastle_api:
+            # Make the TTS request using the TTSGenerateRequest object
+            response_text = await podcastle_api.make_request(tts_request)
+            print(response_text)
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(main())

App/TTS/utils/__init__.py ADDED Viewed

File without changes

App/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.gzip import GZipMiddleware
 from .Embedding.EmbeddingRoutes import embeddigs_router
@@ -39,3 +39,4 @@ async def landing_page():
 app.include_router(embeddigs_router)

 from fastapi import FastAPI
 from fastapi.middleware.gzip import GZipMiddleware
+from .TTS.TTSRoutes import tts_router
 from .Embedding.EmbeddingRoutes import embeddigs_router
 app.include_router(embeddigs_router)
+app.include_router(tts_router)