# -*- coding: utf-8 -*- """ Created on Fri May 19 10:37:00 2023 @author: Goutam """ from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import CharacterTextSplitter from langchain.llms import OpenAI from langchain.chains import RetrievalQA from langchain.document_loaders import PyPDFLoader from langchain.chat_models import ChatOpenAI from googletrans import Translator import re import os import gradio as gr #from langchain.document_loaders import TextLoader #Other loaders PyPDFLoader,PyPDFDirectoryLoader #from langchain.document_loaders import UnstructuredFileLoader os.environ['OPENAI_API_KEY']='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' def translate(in_str): URL_COM = 'translate.google.com' LANG = "zh-CN" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese translator = Translator(service_urls=[URL_COM]) translation = translator.translate(in_str, dest=LANG) #print(translation) translation_str = str(translation) answer_group = re.search('text=(.*)pronunciation=', translation_str) answer_chinese="" if answer_group is not None: answer_chinese = answer_group.group(1) print("Group not none-",answer_chinese) else: answer_chinese = translation_str print("Group is None") #print("Answer in Chinese-", answer_chinese) return answer_chinese def cccs_demo(question): embeddings = OpenAIEmbeddings() docsearch = Chroma(persist_directory="ChinaDB/", embedding_function=embeddings) #Custom prompt from langchain.prompts import PromptTemplate prompt_template = """Use the documents uploaded on China, to answer the question at the end. If you don't know the answer' {context} Question: {question} Answer:""" PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) chain_type_kwargs = {"prompt": PROMPT} llm = OpenAI(temperature=0) #Have commented the original below qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) #new function #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) query = question #answer = qa.run(query) answer = qa({"query": query}) full_result = answer['result'] print("Answer ",full_result) chinese_result = "" chinese_result = translate(full_result) final_result = full_result + '\n' + chinese_result print("Final result-",final_result) """ english_group = re.search('ENGLISH=(.*)CHINESE=', full_result) english_answer = english_group.group(1) chinese_group = re.search('CHINESE=(.*)END', full_result) chinese_answer = chinese_group.group(1) print("English Answer-",english_answer) print("Chinese Answer-",chinese_answer) """ source_docs = answer['source_documents'] print("Number of sources ",len(source_docs)) #print("Source docs-",source_docs) # Add Doc summary chain_type_kwargs = {"prompt": PROMPT} #Have commented the original below qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) #new function #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) query = "Please give summary of contents all the documents." #answer = qa.run(query) answer = qa({"query": query}) doc_summary = answer['result'] print("Document Summary-",doc_summary) chinese_summary ="" chinese_summary = translate(doc_summary) final_summary = doc_summary+ '\n' + chinese_summary print("Final result-",final_summary) # End doc summary ref_str = "None.txt" source_len = len(source_docs) if (source_len > 0): with open("Referenced.txt",'w',encoding='utf-8') as f: for i in range(len(source_docs)): #print("Referred source-",i+1,answer['source_documents'][i]) source_string = str(source_docs[i]) page_content = re.search('page_content=(.*)metadata=', source_string) source = page_content.group(1) source = source.replace('\\n','\n') source = source.replace('\\uf07d',' ') source = source.replace('\\xa0',' ') page_str = "Page Content"+'\n' print("Page Content",'\n') print(source) f.write(page_str) f.write(source) f.write('\n') meta_data_group = re.search('metadata={(.*)}',source_string) meta_data = meta_data_group.group(1) meta_str = "Meta Data-" print("Meta Data-",'\n') print(meta_data) f.write(meta_str) f.write(meta_data) f.write('\n\n') ref_str = 'Referenced.txt' else: with open("None.txt",'w',encoding='utf-8') as f: none_str = "No directy sources found" f.write(none_str) return final_result,final_summary,ref_str """ URL_COM = 'translate.google.com' URL_HI = 'translate.google.hi' LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese translator = Translator(service_urls=[URL_COM]) translation = translator.translate(answer['result'], dest=LANG) #print(translation) translation_str = str(translation) answer_group = re.search('text=(.*)pronunciation=', translation_str) answer_hindi = answer_group.group(1) print("Answer in Hindi-", answer_hindi) """ title = "Zero2AI CCCS Demo" description = "Demonstration of multi-document and multi-lingual Q&A on China." demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,theme=gr.themes.Glass(primary_hue="indigo", secondary_hue="purple")) demo.queue() demo.launch() #formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t') #print("Source Documents ",formatted_source) #To check source in RetrievalQA #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True) #result #answer['result'] #answer['source_documents']