Spaces:
Sleeping
Sleeping
File size: 15,246 Bytes
5f900bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
from functools import wraps
from typing import Dict
from llama_index.core import StorageContext , VectorStoreIndex
from llama_index.core.base.base_retriever import BaseRetriever
from llama_index.core.schema import NodeRelationship , NodeWithScore
from chromadb import EphemeralClient
from llama_index.vector_stores.chroma import ChromaVectorStore
from nest_asyncio import apply as nest_asyncio_apply
from pickle import load as pickle_load ,loads as pickle_loads, dump as pickle_dump, dumps as pickle_dumps
from bs4 import BeautifulSoup
from edge_tts import Communicate
from uuid import uuid4
from os import remove , getenv
from pydub import AudioSegment
from json import loads , dumps
from io import BytesIO
from g4f.client import Client
from re import search , DOTALL
from flask_caching import Cache
from base64 import b64encode , b64decode
from flask import Flask, request, send_file , redirect , jsonify , render_template
from asyncio import run as run_asyncio
from requests import get as reqget
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from g4f.Provider import __providers__
from io import StringIO
import flask_monitoringdashboard as dashboard
import sys
nest_asyncio_apply()
class AIBook:
password = '123456'
app = Flask(__name__,template_folder="")
app.config['CACHE_TYPE'] = 'SimpleCache'
app.config['CACHE_DEFAULT_TIMEOUT'] = 600 # seconds
cache = Cache(app)
embed_model:HuggingFaceEmbedding = HuggingFaceEmbedding("model")
index:VectorStoreIndex = VectorStoreIndex(nodes=pickle_load(open('allbook.book', 'rb')),embed_model=embed_model,storage_context=StorageContext.from_defaults(vector_store=ChromaVectorStore(chroma_collection= EphemeralClient().get_or_create_collection("bookAiLib"))))
srcnum:int=2
retriever:BaseRetriever = index.as_retriever(similarity_top_k=srcnum,vector_store_query_mode="default")
system_prompt:str = open("prompt.txt",encoding="utf-8").read()
teachertts = "hi-IN-MadhurNeural"
studenttts = "hi-IN-SwaraNeural"
error_audio:AudioSegment = AudioSegment.from_file("default_error_script.mp3", format="mp3")
LLMENABLE:bool = True
URL:str = "https://"+getenv("SPACE_HOST")
llmprovider = {provider.__name__.lower():provider for provider in __providers__}
llmprovider[""] = None
@classmethod
def changesrcnum(cls,srcnum:int):
cls.srcnum = srcnum
cls.retriever = cls.index.as_retriever(similarity_top_k=cls.srcnum,vector_store_query_mode="default")
cls.retriever.retrieve
@classmethod
def retrieve(cls,query:str)->list[NodeWithScore]:
return cls.retriever.retrieve(query)
@classmethod
def nodes_to_guj(cls,nodes:list[NodeWithScore])->list[str]:
return [node.node.relationships[NodeRelationship("1")].metadata["maintext"] for node in nodes]
@classmethod
def nodes_to_eng(cls,nodes:list[NodeWithScore])->list[str]:
return [node.node.text for node in nodes]
@classmethod
def nodes_to_page_with_bookname(cls,nodes:list[NodeWithScore])->list[Dict]:
return [{"page":int(node.node.relationships[NodeRelationship("1")].metadata["page"]),"bookname":node.node.relationships[NodeRelationship("1")].metadata["book"]} for node in nodes]
@classmethod
def translate_text(cls,text: str, src: str = "gu", tgt: str = "en",enabletransiliration=True) -> str:
if bool(search(r"[A-Za-z]", text)) and enabletransiliration:
text = reqget(f"https://inputtools.google.com/request?itc={src}-t-i0-und&num=1&text={text}").json()[1][0][1][0]
return "".join([i[0] for i in reqget(f'https://translate.googleapis.com/translate_a/single?client=gtx&sl={src}&tl={tgt}&dt=t&q={text}').json()[0]])
@classmethod
def tts_generate(cls,text: str, speaker: str) -> str:
temp_filename = f"temp_{uuid4()}.wav"
run_asyncio(Communicate(text, cls.teachertts if speaker == "teacher" else cls.studenttts).save(temp_filename))
return temp_filename
@classmethod
def combine_audio_files(cls,json_script:str) -> bytes:
combined_audio = AudioSegment.empty()
if "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ" not in json_script:
audio_files = [cls.tts_generate(item['line'], item['speaker']) for item in loads(json_script)]
for audio_file in audio_files:
combined_audio += AudioSegment.from_file(audio_file)
remove(audio_file)
else:
page_audio = cls.tts_generate(loads(json_script)[-1]['line'], "student")
combined_audio = cls.error_audio+AudioSegment.from_file(page_audio)
remove(page_audio)
return combined_audio.export(format="mp3").read()
@classmethod
def generate_script(cls, prompt:str,provider:str="",model:str="gpt-4o-mini",retry=1)->str:
try:
if cls.LLMENABLE and retry<4:
result = Client().chat.completions.create(max_tokens=2024,provider=cls.llmprovider[provider],model=model,messages=[{"role": "user", "content": "AGENT PROMPT:-\n"+cls.system_prompt+"\nUSER PROMPT:-\n"+prompt}]).choices[0].message.content
try:
return search(r'```json(.*?)```', result, DOTALL).group(1).replace('"वक्ता"','"speaker"').replace('"अध्यापक"','"teacher"').replace('"विद्यार्थी"','"student"').replace('"वाक्य"','"line"')
except:
print(result)
return cls.generate_script(prompt,provider,model,retry+1)
else:
raise Exception("LLM faced a problem")
except Exception as e:
print(e)
return dumps([{"speaker": "teacher","line": "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ"},{"speaker": "student","line": "कुछ नहीं गुरूजी आप मुझे वो शास्त्र के भावार्थ की बात बताने वाला पेज तो देते जाइए में खुद पढ़ लूंगा"},{"speaker": "teacher","line": "लो , इसमें लिखे हुए अक्षर थोड़े खराब है इसलिए तुम्हें दिक्कत हो सकती है"},{"speaker": "student","line": "ठीक है मैं पढता हूँ"},{"speaker": "student","line": AIBook.translate_text(prompt,'en','hi',False)}])
@classmethod
def get_image_url(cls,bookid:str,page:str|int)->str:
return BeautifulSoup(reqget(f"https://jainqq.org/explore/{bookid}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")
@classmethod
def encode_string(cls,s:str)->str:
return b64encode(s.encode()).decode()
@classmethod
def decode_string(cls,s:str)->str:
return b64decode(s.encode()).decode()
@classmethod
def cachereqbyurl(cls,func):
@wraps(func)
def wrapper(*args, **kwargs):
cache_key = request.url
cached_result = cls.cache.get(cache_key)
if cached_result:
return cached_result
result = func(*args, **kwargs)
cls.cache.set(cache_key, result, timeout=600)
return result
return wrapper
dashboard.bind(app=AIBook.app)
@AIBook.app.route("/")
@AIBook.cachereqbyurl
def function1():
return f"""Please provide a question as {AIBook.URL}/get_text?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ<br>for getting image of question {AIBook.URL}/question_to_img?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ <br>for get image by book and page {AIBook.URL}/getbookimage/023657/99<br>for get image redirect url by book and page {AIBook.URL}/getbookimage/023657/99?redirect=true<br>for knowing full data with page etc {AIBook.URL}/fulldetails?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ <br>Change srcnum as {AIBook.URL}/changesrcnum?srcnum=2<br>enable or disable llm through {AIBook.URL}/LLM_ENABLE?enable=true || {AIBook.URL}/LLM_ENABLE?enable=false <br>Get audio from podcast script as {AIBook.URL}/tts_podcast?json_script=json_script for get podcast audio<br>{AIBook.URL}/get_prompt?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get prompt<br>{AIBook.URL}/podcast?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get details<br>{AIBook.URL}/podcast_audio?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get audio<br>{AIBook.URL}/podcast_script?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get podcast script<br>you can try replace of "પ્રભુ નું અસ્તિત્વ છે કે નહિ" with "prabhu nu astitva chhe ke nahi"<br>{AIBook.URL}/download_embedding for downloading embedding book file<br>For uploading a 1 book file<br><form action="/insert_nodes" method="post" enctype="multipart/form-data"><input type="file" name="file" accept=".book" /><input type="submit" value="Upload Book File" /></form><br>for uploading allbook file<br><form action="/upload_embedding" method="post" enctype="multipart/form-data"><input type="file" name="file" accept=".book" /><input type="submit" value="Upload Book File" /></form><br>Get python interactive shell as {AIBook.URL}/python <br>change python authentication password {AIBook.URL}/change_password?password=123456<br>to monitor api usage {AIBook.URL}/dashboard with <b><i>admin</b></i> username and <b><i>admin</b></i> password"""
@AIBook.app.route("/tts_podcast")
@AIBook.cachereqbyurl
def function2():
return send_file(BytesIO(AIBook.combine_audio_files(request.args.get('json_script'))), download_name='podcast.mp3', mimetype="audio/mp3")
@AIBook.app.route("/changesrcnum")
def function3():
AIBook.changesrcnum(int(request.args.get("srcnum")))
return "srcnum changed"
@AIBook.app.route("/insert_nodes",methods=["POST"])
def function4():
objs = pickle_loads(request.files['file'].read())
AIBook.index.insert_nodes(objs)
pickle_dump(pickle_load(open("allbook.book",'rb'))+objs,open("allbook.book",'wb'))
return "inserted"
@AIBook.app.route("/fulldetails")
@AIBook.cachereqbyurl
def function5():
question = request.args.get("question")
trans = AIBook.translate_text(question)
return jsonify({"question":question,"translatedquestion":trans,"contents":[{"text":i.get_text(),"score":i.get_score(),"metadata":i.node.relationships[NodeRelationship("1")].metadata} for i in AIBook.retrieve(trans)]})
@AIBook.app.route("/getbookimage/<bookid>/<page>")
@AIBook.cachereqbyurl
def function6(bookid,page):
pagelink = AIBook.get_image_url(bookid,page)
if request.args.get("redirect") != "false":
return redirect(pagelink)
else:
response = reqget(pagelink)
return send_file(BytesIO(response.content), mimetype=response.headers['Content-Type'])
@AIBook.app.route("/question_to_img")
@AIBook.cachereqbyurl
def function7():
meta = AIBook.retrieve(AIBook.translate_text(request.args.get("question")))[0].node.relationships[NodeRelationship("1")].metadata
response = reqget(AIBook.get_image_url(meta['bookid'],meta['page']))
return send_file(BytesIO(response.content), mimetype=response.headers['Content-Type'])
@AIBook.app.route("/get_prompt")
@AIBook.cachereqbyurl
def function8():
translatedquestion = AIBook.translate_text(request.args.get("question"))
node = AIBook.retrieve(translatedquestion)[0].node
metadata = node.relationships[NodeRelationship("1")].metadata
return jsonify({"image":AIBook.get_image_url(metadata['bookid'],metadata['page']),"text":f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\""})
@AIBook.app.route("/podcast")
@AIBook.cachereqbyurl
def function9():
translatedquestion = AIBook.translate_text(request.args.get("question"))
node = AIBook.retrieve(translatedquestion)[0].node
metadata = node.relationships[NodeRelationship("1")].metadata
script=AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\"")
return jsonify({"image":AIBook.get_image_url(metadata['bookid'],metadata['page']),"script":"\n".join(["गुरुदेव :- "+i['line'] if i['speaker'] == 'teacher' else "शिष्य :- "+i['line'] for i in loads(script)]),"metadata":metadata,"audio":b64encode(AIBook.combine_audio_files(script)).decode('utf-8')})
@AIBook.app.route("/podcast_audio")
@AIBook.cachereqbyurl
def function10():
translatedquestion = AIBook.translate_text(request.args.get("question"))
return send_file(BytesIO(AIBook.combine_audio_files(AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{AIBook.retrieve(translatedquestion)[0].node.text}\"\"\""))), mimetype="audio/mp3")
@AIBook.app.route("/podcast_script")
@AIBook.cachereqbyurl
def function11():
translatedquestion = AIBook.translate_text(request.args.get("question"))
node = AIBook.retrieve(translatedquestion)[0].node
metadata = node.relationships[NodeRelationship("1")].metadata
script = AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\"")
return "\n".join(["गुरुदेव :- "+i['line'] if i['speaker'] == 'teacher' else "शिष्य :- "+i['line'] for i in loads(script)]) + "\nBookname : "+ metadata["book"] + " page : "+ str(metadata["page"]) +"\nReferance page image link : " + AIBook.get_image_url(metadata['bookid'],metadata['page'])
@AIBook.app.route("/download_embedding")
def function12():
return send_file("allbook.book", as_attachment=True)
@AIBook.app.route("/upload_embedding", methods=['POST'])
def function13():
file = request.files['file']
AIBook.index.vector_store.clear()
file.save("allbook.book")
AIBook.index.insert_nodes(pickle_load(open("allbook.book",'rb')))
return "Done"
@AIBook.app.route("/LLM_ENABLE")
def function14():
AIBook.LLMENABLE = True if request.args.get("enable").strip().lower() == "true" else False
return "Done"
@AIBook.app.route("/get_text")
@AIBook.cachereqbyurl
def function15()->str:
return "\n\n".join(AIBook.nodes_to_guj(AIBook.retrieve(AIBook.translate_text(request.args.get("question")))))
@AIBook.app.route('/python')
def function16():
return render_template('index.html')
@AIBook.app.route('/compile', methods=['POST'])
def function17():
try:
if AIBook.decode_string(request.headers.get('Authorization')) != AIBook.password:
return 'Unauthorized', 401
stdout_redirect = sys.stdout
sys.stdout = StringIO()
exec(compile(request.json['code'], '<string>', 'exec'))
output = sys.stdout.getvalue()
sys.stdout = stdout_redirect
return output
except Exception as e:
return str(e)
@AIBook.app.route('/change_password')
def function18():
AIBook.password = request.args.get('password')
return 'Password changed successfully. now password of python is <b>'+AIBook.encode_string(AIBook.password)+'</b>'
if __name__ == '__main__':
AIBook.app.run("0.0.0.0",7860,debug=False)
|