from functools import wraps
from typing import Dict
from llama_index.core import StorageContext , VectorStoreIndex
from llama_index.core.base.base_retriever import BaseRetriever
from llama_index.core.schema import NodeRelationship , NodeWithScore
from chromadb import EphemeralClient
from llama_index.vector_stores.chroma import ChromaVectorStore
from nest_asyncio import apply as nest_asyncio_apply
from pickle import load as pickle_load ,loads as pickle_loads, dump as pickle_dump, dumps as pickle_dumps
from bs4 import BeautifulSoup
from edge_tts import Communicate
from uuid import uuid4
from os import remove , getenv
from pydub import AudioSegment
from json import loads , dumps
from io import BytesIO
from g4f.client import Client
from re import search , DOTALL
from flask_caching import Cache
from base64 import b64encode , b64decode
from flask import Flask, request, send_file , redirect , jsonify , render_template
from asyncio import run as run_asyncio
from requests import get as reqget
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from g4f.Provider import __providers__
from io import StringIO
import flask_monitoringdashboard as dashboard
import sys
nest_asyncio_apply()
class AIBook:
password = '123456'
app = Flask(__name__,template_folder="")
app.config['CACHE_TYPE'] = 'SimpleCache'
app.config['CACHE_DEFAULT_TIMEOUT'] = 600 # seconds
cache = Cache(app)
embed_model:HuggingFaceEmbedding = HuggingFaceEmbedding("model")
index:VectorStoreIndex = VectorStoreIndex(nodes=pickle_load(open('allbook.book', 'rb')),embed_model=embed_model,storage_context=StorageContext.from_defaults(vector_store=ChromaVectorStore(chroma_collection= EphemeralClient().get_or_create_collection("bookAiLib"))))
srcnum:int=2
retriever:BaseRetriever = index.as_retriever(similarity_top_k=srcnum,vector_store_query_mode="default")
system_prompt:str = open("prompt.txt",encoding="utf-8").read()
teachertts = "hi-IN-MadhurNeural"
studenttts = "hi-IN-SwaraNeural"
error_audio:AudioSegment = AudioSegment.from_file("default_error_script.mp3", format="mp3")
LLMENABLE:bool = True
URL:str = "https://"+getenv("SPACE_HOST")
llmprovider = {provider.__name__.lower():provider for provider in __providers__}
llmprovider[""] = None
@classmethod
def changesrcnum(cls,srcnum:int):
cls.srcnum = srcnum
cls.retriever = cls.index.as_retriever(similarity_top_k=cls.srcnum,vector_store_query_mode="default")
cls.retriever.retrieve
@classmethod
def retrieve(cls,query:str)->list[NodeWithScore]:
return cls.retriever.retrieve(query)
@classmethod
def nodes_to_guj(cls,nodes:list[NodeWithScore])->list[str]:
return [node.node.relationships[NodeRelationship("1")].metadata["maintext"] for node in nodes]
@classmethod
def nodes_to_eng(cls,nodes:list[NodeWithScore])->list[str]:
return [node.node.text for node in nodes]
@classmethod
def nodes_to_page_with_bookname(cls,nodes:list[NodeWithScore])->list[Dict]:
return [{"page":int(node.node.relationships[NodeRelationship("1")].metadata["page"]),"bookname":node.node.relationships[NodeRelationship("1")].metadata["book"]} for node in nodes]
@classmethod
def translate_text(cls,text: str, src: str = "gu", tgt: str = "en",enabletransiliration=True) -> str:
if bool(search(r"[A-Za-z]", text)) and enabletransiliration:
text = reqget(f"https://inputtools.google.com/request?itc={src}-t-i0-und&num=1&text={text}").json()[1][0][1][0]
return "".join([i[0] for i in reqget(f'https://translate.googleapis.com/translate_a/single?client=gtx&sl={src}&tl={tgt}&dt=t&q={text}').json()[0]])
@classmethod
def tts_generate(cls,text: str, speaker: str) -> str:
temp_filename = f"temp_{uuid4()}.wav"
run_asyncio(Communicate(text, cls.teachertts if speaker == "teacher" else cls.studenttts).save(temp_filename))
return temp_filename
@classmethod
def combine_audio_files(cls,json_script:str) -> bytes:
combined_audio = AudioSegment.empty()
if "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ" not in json_script:
audio_files = [cls.tts_generate(item['line'], item['speaker']) for item in loads(json_script)]
for audio_file in audio_files:
combined_audio += AudioSegment.from_file(audio_file)
remove(audio_file)
else:
page_audio = cls.tts_generate(loads(json_script)[-1]['line'], "student")
combined_audio = cls.error_audio+AudioSegment.from_file(page_audio)
remove(page_audio)
return combined_audio.export(format="mp3").read()
@classmethod
def generate_script(cls, prompt:str,provider:str="",model:str="gpt-4o-mini",retry=1)->str:
try:
if cls.LLMENABLE and retry<4:
result = Client().chat.completions.create(max_tokens=2024,provider=cls.llmprovider[provider],model=model,messages=[{"role": "user", "content": "AGENT PROMPT:-\n"+cls.system_prompt+"\nUSER PROMPT:-\n"+prompt}]).choices[0].message.content
try:
return search(r'```json(.*?)```', result, DOTALL).group(1).replace('"वक्ता"','"speaker"').replace('"अध्यापक"','"teacher"').replace('"विद्यार्थी"','"student"').replace('"वाक्य"','"line"')
except:
print(result)
return cls.generate_script(prompt,provider,model,retry+1)
else:
raise Exception("LLM faced a problem")
except Exception as e:
print(e)
return dumps([{"speaker": "teacher","line": "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ"},{"speaker": "student","line": "कुछ नहीं गुरूजी आप मुझे वो शास्त्र के भावार्थ की बात बताने वाला पेज तो देते जाइए में खुद पढ़ लूंगा"},{"speaker": "teacher","line": "लो , इसमें लिखे हुए अक्षर थोड़े खराब है इसलिए तुम्हें दिक्कत हो सकती है"},{"speaker": "student","line": "ठीक है मैं पढता हूँ"},{"speaker": "student","line": AIBook.translate_text(prompt,'en','hi',False)}])
@classmethod
def get_image_url(cls,bookid:str,page:str|int)->str:
return BeautifulSoup(reqget(f"https://jainqq.org/explore/{bookid}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")
@classmethod
def encode_string(cls,s:str)->str:
return b64encode(s.encode()).decode()
@classmethod
def decode_string(cls,s:str)->str:
return b64decode(s.encode()).decode()
@classmethod
def cachereqbyurl(cls,func):
@wraps(func)
def wrapper(*args, **kwargs):
cache_key = request.url
cached_result = cls.cache.get(cache_key)
if cached_result:
return cached_result
result = func(*args, **kwargs)
cls.cache.set(cache_key, result, timeout=600)
return result
return wrapper
dashboard.bind(app=AIBook.app)
@AIBook.app.route("/")
@AIBook.cachereqbyurl
def function1():
return f"""Please provide a question as {AIBook.URL}/get_text?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ
for getting image of question {AIBook.URL}/question_to_img?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ
for get image by book and page {AIBook.URL}/getbookimage/023657/99
for get image redirect url by book and page {AIBook.URL}/getbookimage/023657/99?redirect=true
for knowing full data with page etc {AIBook.URL}/fulldetails?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ
Change srcnum as {AIBook.URL}/changesrcnum?srcnum=2
enable or disable llm through {AIBook.URL}/LLM_ENABLE?enable=true || {AIBook.URL}/LLM_ENABLE?enable=false
Get audio from podcast script as {AIBook.URL}/tts_podcast?json_script=json_script for get podcast audio
{AIBook.URL}/get_prompt?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get prompt
{AIBook.URL}/podcast?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get details
{AIBook.URL}/podcast_audio?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get audio
{AIBook.URL}/podcast_script?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get podcast script
you can try replace of "પ્રભુ નું અસ્તિત્વ છે કે નહિ" with "prabhu nu astitva chhe ke nahi"
{AIBook.URL}/download_embedding for downloading embedding book file
For uploading a 1 book file