Spaces:
Sleeping
Sleeping
File size: 6,712 Bytes
e492076 a35c444 e492076 35c8bdd 36cea4d 6d2a856 e492076 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | # -*- coding: utf-8 -*-
"""
Created on Fri May 19 10:37:00 2023
@author: Goutam
"""
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.chat_models import ChatOpenAI
from googletrans import Translator
import re
import os
import gradio as gr
#from langchain.document_loaders import TextLoader
#Other loaders PyPDFLoader,PyPDFDirectoryLoader
#from langchain.document_loaders import UnstructuredFileLoader
os.environ['OPENAI_API_KEY']='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'
def translate(in_str):
URL_COM = 'translate.google.com'
LANG = "zh-CN" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
translator = Translator(service_urls=[URL_COM])
translation = translator.translate(in_str, dest=LANG)
#print(translation)
translation_str = str(translation)
answer_group = re.search('text=(.*)pronunciation=', translation_str)
answer_chinese=""
if answer_group is not None:
answer_chinese = answer_group.group(1)
print("Group not none-",answer_chinese)
else:
answer_chinese = translation_str
print("Group is None")
#print("Answer in Chinese-", answer_chinese)
return answer_chinese
def cccs_demo(question):
embeddings = OpenAIEmbeddings()
docsearch = Chroma(persist_directory="ChinaDB/", embedding_function=embeddings)
#Custom prompt
from langchain.prompts import PromptTemplate
prompt_template = """Use the documents uploaded on China, to answer the question at the end. If you don't know the answer'
{context}
Question: {question}
Answer:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
llm = OpenAI(temperature=0)
#Have commented the original below
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
query = question
#answer = qa.run(query)
answer = qa({"query": query})
full_result = answer['result']
print("Answer ",full_result)
chinese_result = ""
chinese_result = translate(full_result)
final_result = full_result + '\n' + chinese_result
print("Final result-",final_result)
"""
english_group = re.search('ENGLISH=(.*)CHINESE=', full_result)
english_answer = english_group.group(1)
chinese_group = re.search('CHINESE=(.*)END', full_result)
chinese_answer = chinese_group.group(1)
print("English Answer-",english_answer)
print("Chinese Answer-",chinese_answer)
"""
source_docs = answer['source_documents']
print("Number of sources ",len(source_docs))
#print("Source docs-",source_docs)
# Add Doc summary
chain_type_kwargs = {"prompt": PROMPT}
#Have commented the original below
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
query = "Please give summary of contents all the documents."
#answer = qa.run(query)
answer = qa({"query": query})
doc_summary = answer['result']
print("Document Summary-",doc_summary)
chinese_summary =""
chinese_summary = translate(doc_summary)
final_summary = doc_summary+ '\n' + chinese_summary
print("Final result-",final_summary)
# End doc summary
ref_str = "None.txt"
source_len = len(source_docs)
if (source_len > 0):
with open("Referenced.txt",'w',encoding='utf-8') as f:
for i in range(len(source_docs)):
#print("Referred source-",i+1,answer['source_documents'][i])
source_string = str(source_docs[i])
page_content = re.search('page_content=(.*)metadata=', source_string)
source = page_content.group(1)
source = source.replace('\\n','\n')
source = source.replace('\\uf07d',' ')
source = source.replace('\\xa0',' ')
page_str = "Page Content"+'\n'
print("Page Content",'\n')
print(source)
f.write(page_str)
f.write(source)
f.write('\n')
meta_data_group = re.search('metadata={(.*)}',source_string)
meta_data = meta_data_group.group(1)
meta_str = "Meta Data-"
print("Meta Data-",'\n')
print(meta_data)
f.write(meta_str)
f.write(meta_data)
f.write('\n\n')
ref_str = 'Referenced.txt'
else:
with open("None.txt",'w',encoding='utf-8') as f:
none_str = "No directy sources found"
f.write(none_str)
return final_result,final_summary,ref_str
"""
URL_COM = 'translate.google.com'
URL_HI = 'translate.google.hi'
LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
translator = Translator(service_urls=[URL_COM])
translation = translator.translate(answer['result'], dest=LANG)
#print(translation)
translation_str = str(translation)
answer_group = re.search('text=(.*)pronunciation=', translation_str)
answer_hindi = answer_group.group(1)
print("Answer in Hindi-", answer_hindi)
"""
title = "Zero2AI CCCS Demo"
description = "Demonstration of multi-document and multi-lingual Q&A on China."
demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,theme=gr.themes.Glass(primary_hue="indigo", secondary_hue="purple"))
demo.queue()
demo.launch()
#formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t')
#print("Source Documents ",formatted_source)
#To check source in RetrievalQA
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True)
#result
#answer['result']
#answer['source_documents']
|