File size: 15,246 Bytes
5f900bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
from functools import wraps
from typing import Dict
from llama_index.core import StorageContext , VectorStoreIndex
from llama_index.core.base.base_retriever import BaseRetriever
from llama_index.core.schema import NodeRelationship , NodeWithScore
from chromadb import EphemeralClient
from llama_index.vector_stores.chroma import ChromaVectorStore
from nest_asyncio import apply as nest_asyncio_apply
from pickle import load as pickle_load ,loads as pickle_loads, dump as pickle_dump, dumps as pickle_dumps
from bs4 import BeautifulSoup
from edge_tts import Communicate
from uuid import uuid4
from os import remove , getenv
from pydub import AudioSegment
from json import loads , dumps
from io import BytesIO
from g4f.client import Client
from re import search , DOTALL
from flask_caching import Cache
from base64 import b64encode , b64decode
from flask import Flask, request, send_file , redirect , jsonify , render_template
from asyncio import run as run_asyncio
from requests import get as reqget
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from g4f.Provider import __providers__
from io import StringIO
import flask_monitoringdashboard as dashboard
import sys

nest_asyncio_apply()

class AIBook:
  password = '123456'
  app = Flask(__name__,template_folder="")
  app.config['CACHE_TYPE'] = 'SimpleCache'
  app.config['CACHE_DEFAULT_TIMEOUT'] = 600 # seconds
  cache = Cache(app)
  embed_model:HuggingFaceEmbedding = HuggingFaceEmbedding("model")
  index:VectorStoreIndex = VectorStoreIndex(nodes=pickle_load(open('allbook.book', 'rb')),embed_model=embed_model,storage_context=StorageContext.from_defaults(vector_store=ChromaVectorStore(chroma_collection= EphemeralClient().get_or_create_collection("bookAiLib"))))
  srcnum:int=2
  retriever:BaseRetriever = index.as_retriever(similarity_top_k=srcnum,vector_store_query_mode="default")
  system_prompt:str = open("prompt.txt",encoding="utf-8").read()
  teachertts = "hi-IN-MadhurNeural"
  studenttts = "hi-IN-SwaraNeural"
  error_audio:AudioSegment = AudioSegment.from_file("default_error_script.mp3", format="mp3")
  LLMENABLE:bool = True
  URL:str = "https://"+getenv("SPACE_HOST")
  llmprovider = {provider.__name__.lower():provider for provider in __providers__}
  llmprovider[""] = None
  @classmethod
  def changesrcnum(cls,srcnum:int):
    cls.srcnum = srcnum
    cls.retriever = cls.index.as_retriever(similarity_top_k=cls.srcnum,vector_store_query_mode="default")
    cls.retriever.retrieve
  @classmethod
  def retrieve(cls,query:str)->list[NodeWithScore]:
    return cls.retriever.retrieve(query)
  @classmethod
  def nodes_to_guj(cls,nodes:list[NodeWithScore])->list[str]:
    return [node.node.relationships[NodeRelationship("1")].metadata["maintext"] for node in nodes]
  @classmethod
  def nodes_to_eng(cls,nodes:list[NodeWithScore])->list[str]:
    return [node.node.text for node in nodes]
  @classmethod
  def nodes_to_page_with_bookname(cls,nodes:list[NodeWithScore])->list[Dict]:
    return [{"page":int(node.node.relationships[NodeRelationship("1")].metadata["page"]),"bookname":node.node.relationships[NodeRelationship("1")].metadata["book"]} for node in nodes]
  @classmethod
  def translate_text(cls,text: str, src: str = "gu", tgt: str = "en",enabletransiliration=True) -> str:
    if bool(search(r"[A-Za-z]", text)) and enabletransiliration:
      text = reqget(f"https://inputtools.google.com/request?itc={src}-t-i0-und&num=1&text={text}").json()[1][0][1][0]
    return "".join([i[0] for i in reqget(f'https://translate.googleapis.com/translate_a/single?client=gtx&sl={src}&tl={tgt}&dt=t&q={text}').json()[0]])
  @classmethod
  def tts_generate(cls,text: str, speaker: str) -> str:
    temp_filename = f"temp_{uuid4()}.wav"
    run_asyncio(Communicate(text, cls.teachertts if speaker == "teacher" else cls.studenttts).save(temp_filename))
    return temp_filename
  @classmethod
  def combine_audio_files(cls,json_script:str) -> bytes:
    combined_audio = AudioSegment.empty()
    if "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ" not in json_script:
      audio_files = [cls.tts_generate(item['line'], item['speaker']) for item in loads(json_script)]
      for audio_file in audio_files:
          combined_audio += AudioSegment.from_file(audio_file)
          remove(audio_file)
    else:
      page_audio = cls.tts_generate(loads(json_script)[-1]['line'], "student")
      combined_audio = cls.error_audio+AudioSegment.from_file(page_audio)
      remove(page_audio)
    return combined_audio.export(format="mp3").read()
  @classmethod
  def generate_script(cls, prompt:str,provider:str="",model:str="gpt-4o-mini",retry=1)->str:
    try:
      if cls.LLMENABLE and retry<4:
        result = Client().chat.completions.create(max_tokens=2024,provider=cls.llmprovider[provider],model=model,messages=[{"role": "user", "content": "AGENT PROMPT:-\n"+cls.system_prompt+"\nUSER PROMPT:-\n"+prompt}]).choices[0].message.content
        try:
          return search(r'```json(.*?)```', result, DOTALL).group(1).replace('"वक्ता"','"speaker"').replace('"अध्यापक"','"teacher"').replace('"विद्यार्थी"','"student"').replace('"वाक्य"','"line"')
        except:
          print(result)
          return cls.generate_script(prompt,provider,model,retry+1)
      else:
        raise Exception("LLM faced a problem")
    except Exception as e:
      print(e)
      return dumps([{"speaker": "teacher","line": "शिष्य चलो,आज का समय यही समाप्त होता है में अब आगे कुछ नहीं बोल सकता , मैं थक गया हूँ"},{"speaker": "student","line": "कुछ नहीं गुरूजी आप मुझे वो शास्त्र के भावार्थ की बात बताने वाला  पेज तो देते जाइए में खुद पढ़ लूंगा"},{"speaker": "teacher","line": "लो , इसमें लिखे हुए अक्षर थोड़े खराब है इसलिए तुम्हें दिक्कत हो सकती है"},{"speaker": "student","line": "ठीक है मैं पढता हूँ"},{"speaker": "student","line": AIBook.translate_text(prompt,'en','hi',False)}])

  @classmethod
  def get_image_url(cls,bookid:str,page:str|int)->str:
    return BeautifulSoup(reqget(f"https://jainqq.org/explore/{bookid}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")
  @classmethod
  def encode_string(cls,s:str)->str:
    return b64encode(s.encode()).decode()
  @classmethod
  def decode_string(cls,s:str)->str:
    return b64decode(s.encode()).decode()
  @classmethod
  def cachereqbyurl(cls,func):
      @wraps(func)
      def wrapper(*args, **kwargs):
          cache_key = request.url
          cached_result = cls.cache.get(cache_key)
          if cached_result:
              return cached_result
          result = func(*args, **kwargs)
          cls.cache.set(cache_key, result, timeout=600)
          return result
      return wrapper

dashboard.bind(app=AIBook.app)
@AIBook.app.route("/")
@AIBook.cachereqbyurl
def function1():
  return f"""Please provide a question as {AIBook.URL}/get_text?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ<br>for getting image of question {AIBook.URL}/question_to_img?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ <br>for get image by book and page {AIBook.URL}/getbookimage/023657/99<br>for get image redirect url by book and page {AIBook.URL}/getbookimage/023657/99?redirect=true<br>for knowing full data with page etc {AIBook.URL}/fulldetails?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ <br>Change srcnum as {AIBook.URL}/changesrcnum?srcnum=2<br>enable or disable llm through {AIBook.URL}/LLM_ENABLE?enable=true || {AIBook.URL}/LLM_ENABLE?enable=false <br>Get audio from podcast script as {AIBook.URL}/tts_podcast?json_script=json_script for get podcast audio<br>{AIBook.URL}/get_prompt?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get prompt<br>{AIBook.URL}/podcast?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get details<br>{AIBook.URL}/podcast_audio?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get audio<br>{AIBook.URL}/podcast_script?question=પ્રભુ નું અસ્તિત્વ છે કે નહિ for get podcast script<br>you can try replace of "પ્રભુ નું અસ્તિત્વ છે કે નહિ" with "prabhu nu astitva chhe ke nahi"<br>{AIBook.URL}/download_embedding for downloading embedding book file<br>For uploading a 1 book file<br><form action="/insert_nodes" method="post" enctype="multipart/form-data"><input type="file" name="file" accept=".book" /><input type="submit" value="Upload Book File" /></form><br>for uploading allbook file<br><form action="/upload_embedding" method="post" enctype="multipart/form-data"><input type="file" name="file" accept=".book" /><input type="submit" value="Upload Book File" /></form><br>Get python interactive shell as {AIBook.URL}/python <br>change python authentication password {AIBook.URL}/change_password?password=123456<br>to monitor api usage {AIBook.URL}/dashboard with  <b><i>admin</b></i> username and <b><i>admin</b></i> password"""

@AIBook.app.route("/tts_podcast")
@AIBook.cachereqbyurl
def function2():
  return send_file(BytesIO(AIBook.combine_audio_files(request.args.get('json_script'))), download_name='podcast.mp3', mimetype="audio/mp3")

@AIBook.app.route("/changesrcnum")
def function3():
  AIBook.changesrcnum(int(request.args.get("srcnum")))
  return "srcnum changed"

@AIBook.app.route("/insert_nodes",methods=["POST"])
def function4():
  objs = pickle_loads(request.files['file'].read())
  AIBook.index.insert_nodes(objs)
  pickle_dump(pickle_load(open("allbook.book",'rb'))+objs,open("allbook.book",'wb'))
  return "inserted"

@AIBook.app.route("/fulldetails")
@AIBook.cachereqbyurl
def function5():
  question = request.args.get("question")
  trans = AIBook.translate_text(question)
  return jsonify({"question":question,"translatedquestion":trans,"contents":[{"text":i.get_text(),"score":i.get_score(),"metadata":i.node.relationships[NodeRelationship("1")].metadata} for i in AIBook.retrieve(trans)]})

@AIBook.app.route("/getbookimage/<bookid>/<page>")
@AIBook.cachereqbyurl
def function6(bookid,page):
  pagelink = AIBook.get_image_url(bookid,page)
  if request.args.get("redirect") != "false":
    return redirect(pagelink)
  else:
    response = reqget(pagelink)
    return send_file(BytesIO(response.content), mimetype=response.headers['Content-Type'])

@AIBook.app.route("/question_to_img")
@AIBook.cachereqbyurl
def function7():
  meta = AIBook.retrieve(AIBook.translate_text(request.args.get("question")))[0].node.relationships[NodeRelationship("1")].metadata
  response = reqget(AIBook.get_image_url(meta['bookid'],meta['page']))
  return send_file(BytesIO(response.content), mimetype=response.headers['Content-Type'])

@AIBook.app.route("/get_prompt")
@AIBook.cachereqbyurl
def function8():
  translatedquestion = AIBook.translate_text(request.args.get("question"))
  node = AIBook.retrieve(translatedquestion)[0].node
  metadata = node.relationships[NodeRelationship("1")].metadata
  return jsonify({"image":AIBook.get_image_url(metadata['bookid'],metadata['page']),"text":f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\""})

@AIBook.app.route("/podcast")
@AIBook.cachereqbyurl
def function9():
  translatedquestion = AIBook.translate_text(request.args.get("question"))
  node = AIBook.retrieve(translatedquestion)[0].node
  metadata = node.relationships[NodeRelationship("1")].metadata  
  script=AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\"")
  return jsonify({"image":AIBook.get_image_url(metadata['bookid'],metadata['page']),"script":"\n".join(["गुरुदेव :- "+i['line'] if i['speaker'] == 'teacher' else "शिष्य :- "+i['line'] for i in loads(script)]),"metadata":metadata,"audio":b64encode(AIBook.combine_audio_files(script)).decode('utf-8')})

@AIBook.app.route("/podcast_audio")
@AIBook.cachereqbyurl
def function10():
  translatedquestion = AIBook.translate_text(request.args.get("question"))
  return send_file(BytesIO(AIBook.combine_audio_files(AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{AIBook.retrieve(translatedquestion)[0].node.text}\"\"\""))), mimetype="audio/mp3")

@AIBook.app.route("/podcast_script")
@AIBook.cachereqbyurl
def function11():
  translatedquestion = AIBook.translate_text(request.args.get("question"))
  node = AIBook.retrieve(translatedquestion)[0].node
  metadata = node.relationships[NodeRelationship("1")].metadata
  script = AIBook.generate_script(f"Question:\"{translatedquestion}\"\nReferance book page content:\n\"\"\"{node.text}\"\"\"")
  return "\n".join(["गुरुदेव :- "+i['line'] if i['speaker'] == 'teacher' else "शिष्य :- "+i['line'] for i in loads(script)]) + "\nBookname : "+ metadata["book"] + " page : "+ str(metadata["page"]) +"\nReferance page image link : " + AIBook.get_image_url(metadata['bookid'],metadata['page'])

@AIBook.app.route("/download_embedding")
def function12():
  return send_file("allbook.book", as_attachment=True)

@AIBook.app.route("/upload_embedding", methods=['POST'])
def function13():
  file = request.files['file']
  AIBook.index.vector_store.clear()
  file.save("allbook.book")
  AIBook.index.insert_nodes(pickle_load(open("allbook.book",'rb')))
  return "Done"
@AIBook.app.route("/LLM_ENABLE")
def function14():
  AIBook.LLMENABLE = True if request.args.get("enable").strip().lower() == "true" else False
  return "Done"

@AIBook.app.route("/get_text")
@AIBook.cachereqbyurl
def function15()->str:
  return "\n\n".join(AIBook.nodes_to_guj(AIBook.retrieve(AIBook.translate_text(request.args.get("question")))))

@AIBook.app.route('/python')
def function16():
    return render_template('index.html')

@AIBook.app.route('/compile', methods=['POST'])
def function17():
    try:
        if AIBook.decode_string(request.headers.get('Authorization')) != AIBook.password:
            return 'Unauthorized', 401
        stdout_redirect = sys.stdout
        sys.stdout = StringIO()
        exec(compile(request.json['code'], '<string>', 'exec'))
        output = sys.stdout.getvalue()
        sys.stdout = stdout_redirect
        return output
    except Exception as e:
        return str(e)

@AIBook.app.route('/change_password')
def function18():
    AIBook.password = request.args.get('password')
    return 'Password changed successfully. now password of python is <b>'+AIBook.encode_string(AIBook.password)+'</b>'

if __name__ == '__main__':
    AIBook.app.run("0.0.0.0",7860,debug=False)