File size: 6,712 Bytes
e492076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a35c444
e492076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35c8bdd
36cea4d
6d2a856
e492076
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding: utf-8 -*-
"""
Created on Fri May 19 10:37:00 2023

@author: Goutam
"""

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.chat_models import ChatOpenAI
from googletrans import Translator
import re
import os
import gradio as gr

#from langchain.document_loaders import TextLoader
#Other loaders PyPDFLoader,PyPDFDirectoryLoader
#from langchain.document_loaders import UnstructuredFileLoader

os.environ['OPENAI_API_KEY']='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'
def translate(in_str):
  URL_COM = 'translate.google.com'
  LANG = "zh-CN" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
  translator = Translator(service_urls=[URL_COM])
  translation = translator.translate(in_str, dest=LANG)
  #print(translation)
  translation_str = str(translation)
  answer_group = re.search('text=(.*)pronunciation=', translation_str)
  answer_chinese=""
  if answer_group is not None:
      answer_chinese = answer_group.group(1)
      print("Group not none-",answer_chinese)
  else:
      answer_chinese = translation_str
      print("Group is None")
  #print("Answer in Chinese-", answer_chinese)
  return answer_chinese  
def cccs_demo(question):
    embeddings = OpenAIEmbeddings()
    docsearch = Chroma(persist_directory="ChinaDB/", embedding_function=embeddings)
#Custom prompt
    from langchain.prompts import PromptTemplate

    prompt_template = """Use the documents uploaded on China, to answer the question at the end. If you don't know the answer'

{context}
Question: {question}

Answer:"""
    PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
    )
    chain_type_kwargs = {"prompt": PROMPT}
    llm = OpenAI(temperature=0)
#Have commented the original below
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
    query = question
#answer = qa.run(query)
    answer = qa({"query": query})
    full_result = answer['result']
    print("Answer ",full_result)
    chinese_result = ""
    chinese_result = translate(full_result)
    final_result = full_result + '\n' + chinese_result
    print("Final result-",final_result)
    
    """
    english_group = re.search('ENGLISH=(.*)CHINESE=', full_result)
    english_answer = english_group.group(1)
    chinese_group = re.search('CHINESE=(.*)END', full_result)
    chinese_answer = chinese_group.group(1)
    print("English Answer-",english_answer)
    print("Chinese Answer-",chinese_answer)
    """
    source_docs = answer['source_documents']
    print("Number of sources ",len(source_docs))
    #print("Source docs-",source_docs)
    # Add Doc summary
    chain_type_kwargs = {"prompt": PROMPT}
#Have commented the original below
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
    query = "Please give summary of contents all the documents."
#answer = qa.run(query)
    answer = qa({"query": query})
    doc_summary = answer['result']
    print("Document Summary-",doc_summary)
    chinese_summary =""
    chinese_summary = translate(doc_summary)
    final_summary = doc_summary+ '\n' + chinese_summary
    print("Final result-",final_summary)
# End doc summary
    ref_str = "None.txt"
    source_len = len(source_docs)
    if (source_len > 0):
        with open("Referenced.txt",'w',encoding='utf-8') as f:
            for i in range(len(source_docs)):
    #print("Referred source-",i+1,answer['source_documents'][i])
                source_string = str(source_docs[i])
                page_content = re.search('page_content=(.*)metadata=', source_string)
                source = page_content.group(1)
                source = source.replace('\\n','\n')
                source = source.replace('\\uf07d',' ')
                source = source.replace('\\xa0',' ')
                page_str = "Page Content"+'\n'
                print("Page Content",'\n')
                print(source)
                f.write(page_str)
                f.write(source)
                f.write('\n')
                meta_data_group = re.search('metadata={(.*)}',source_string)
                meta_data = meta_data_group.group(1)
                meta_str = "Meta Data-"
                print("Meta Data-",'\n')
                print(meta_data)
                f.write(meta_str)
                f.write(meta_data)
                f.write('\n\n')
            ref_str = 'Referenced.txt'
    else:
        with open("None.txt",'w',encoding='utf-8') as f:
            none_str = "No directy sources found"
            f.write(none_str)
    return final_result,final_summary,ref_str
"""
URL_COM = 'translate.google.com'
URL_HI = 'translate.google.hi'
LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
translator = Translator(service_urls=[URL_COM])
translation = translator.translate(answer['result'], dest=LANG)
#print(translation)
translation_str = str(translation)
answer_group = re.search('text=(.*)pronunciation=', translation_str)
answer_hindi = answer_group.group(1)
print("Answer in Hindi-", answer_hindi)
"""
title = "Zero2AI CCCS Demo"
description = "Demonstration of multi-document and multi-lingual Q&A on China."
demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,theme=gr.themes.Glass(primary_hue="indigo", secondary_hue="purple"))
demo.queue()
demo.launch()

#formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t')
#print("Source Documents ",formatted_source)
#To check source in RetrievalQA
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True)
#result
#answer['result']
#answer['source_documents']