Goutam2023 commited on
Commit
e492076
·
1 Parent(s): 9ad40ac

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +157 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri May 19 10:37:00 2023
4
+
5
+ @author: Goutam
6
+ """
7
+
8
+ from langchain.embeddings.openai import OpenAIEmbeddings
9
+ from langchain.vectorstores import Chroma
10
+ from langchain.text_splitter import CharacterTextSplitter
11
+ from langchain.llms import OpenAI
12
+ from langchain.chains import RetrievalQA
13
+ from langchain.document_loaders import PyPDFLoader
14
+ from langchain.chat_models import ChatOpenAI
15
+ from googletrans import Translator
16
+ import re
17
+ import os
18
+ import gradio as gr
19
+
20
+ #from langchain.document_loaders import TextLoader
21
+ #Other loaders PyPDFLoader,PyPDFDirectoryLoader
22
+ #from langchain.document_loaders import UnstructuredFileLoader
23
+
24
+ os.environ['OPENAI_API_KEY']='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'
25
+ def translate(in_str):
26
+ URL_COM = 'translate.google.com'
27
+ LANG = "zh-CN" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
28
+ translator = Translator(service_urls=[URL_COM])
29
+ translation = translator.translate(in_str, dest=LANG)
30
+ #print(translation)
31
+ translation_str = str(translation)
32
+ answer_group = re.search('text=(.*)pronunciation=', translation_str)
33
+ answer_chinese=""
34
+ if answer_group is not None:
35
+ answer_chinese = answer_group.group(1)
36
+ print("Group not none-",answer_chinese)
37
+ else:
38
+ answer_chinese = translation_str
39
+ print("Group is None")
40
+ #print("Answer in Chinese-", answer_chinese)
41
+ return answer_chinese
42
+ def cccs_demo(question):
43
+ embeddings = OpenAIEmbeddings()
44
+ docsearch = Chroma(persist_directory="ChinaDB/", embedding_function=embeddings)
45
+ #Custom prompt
46
+ from langchain.prompts import PromptTemplate
47
+
48
+ prompt_template = """Use the documents uploaded on China, to answer the question at the end. If you don't know the answer'
49
+
50
+ {context}
51
+ Question: {question}
52
+
53
+ Answer:"""
54
+ PROMPT = PromptTemplate(
55
+ template=prompt_template, input_variables=["context", "question"]
56
+ )
57
+ chain_type_kwargs = {"prompt": PROMPT}
58
+ llm = OpenAI(temperature=0)
59
+ #Have commented the original below
60
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
61
+ #new function
62
+ #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
63
+ #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
64
+ query = question
65
+ #answer = qa.run(query)
66
+ answer = qa({"query": query})
67
+ full_result = answer['result']
68
+ print("Answer ",full_result)
69
+ chinese_result = ""
70
+ chinese_result = translate(full_result)
71
+ final_result = full_result + '\n' + chinese_result
72
+ print("Final result-",final_result)
73
+
74
+ """
75
+ english_group = re.search('ENGLISH=(.*)CHINESE=', full_result)
76
+ english_answer = english_group.group(1)
77
+ chinese_group = re.search('CHINESE=(.*)END', full_result)
78
+ chinese_answer = chinese_group.group(1)
79
+ print("English Answer-",english_answer)
80
+ print("Chinese Answer-",chinese_answer)
81
+ """
82
+ source_docs = answer['source_documents']
83
+ print("Number of sources ",len(source_docs))
84
+ print("Source docs-",source_docs)
85
+ # Add Doc summary
86
+ chain_type_kwargs = {"prompt": PROMPT}
87
+ #Have commented the original below
88
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
89
+ #new function
90
+ #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
91
+ #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
92
+ query = "Please give summary of contents all the documents."
93
+ #answer = qa.run(query)
94
+ answer = qa({"query": query})
95
+ doc_summary = answer['result']
96
+ print("Document Summary-",doc_summary)
97
+ chinese_summary =""
98
+ chinese_summary = translate(doc_summary)
99
+ final_summary = doc_summary+ '\n' + chinese_summary
100
+ print("Final result-",final_summary)
101
+ # End doc summary
102
+ ref_str = "None.txt"
103
+ source_len = len(source_docs)
104
+ if (source_len > 0):
105
+ with open("Referenced.txt",'w',encoding='utf-8') as f:
106
+ for i in range(len(source_docs)):
107
+ #print("Referred source-",i+1,answer['source_documents'][i])
108
+ source_string = str(source_docs[i])
109
+ page_content = re.search('page_content=(.*)metadata=', source_string)
110
+ source = page_content.group(1)
111
+ source = source.replace('\\n','\n')
112
+ source = source.replace('\\uf07d',' ')
113
+ source = source.replace('\\xa0',' ')
114
+ page_str = "Page Content"+'\n'
115
+ print("Page Content",'\n')
116
+ print(source)
117
+ f.write(page_str)
118
+ f.write(source)
119
+ f.write('\n')
120
+ meta_data_group = re.search('metadata={(.*)}',source_string)
121
+ meta_data = meta_data_group.group(1)
122
+ meta_str = "Meta Data-"
123
+ print("Meta Data-",'\n')
124
+ print(meta_data)
125
+ f.write(meta_str)
126
+ f.write(meta_data)
127
+ f.write('\n\n')
128
+ ref_str = 'Referenced.txt'
129
+ else:
130
+ with open("None.txt",'w',encoding='utf-8') as f:
131
+ none_str = "No directy sources found"
132
+ f.write(none_str)
133
+ return final_result,final_summary,ref_str
134
+ """
135
+ URL_COM = 'translate.google.com'
136
+ URL_HI = 'translate.google.hi'
137
+ LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
138
+ translator = Translator(service_urls=[URL_COM])
139
+ translation = translator.translate(answer['result'], dest=LANG)
140
+ #print(translation)
141
+ translation_str = str(translation)
142
+ answer_group = re.search('text=(.*)pronunciation=', translation_str)
143
+ answer_hindi = answer_group.group(1)
144
+ print("Answer in Hindi-", answer_hindi)
145
+ """
146
+ title = "Zero2AI CCCS Demo"
147
+ description = "Demonstration of multi-document and multi-lingual Q&A on China."
148
+ demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,theme=gr.themes.Glass(primary_hue="indigo", secondary_hue="purple"),allow_flagging='never')
149
+ demo.launch()
150
+
151
+ #formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t')
152
+ #print("Source Documents ",formatted_source)
153
+ #To check source in RetrievalQA
154
+ #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True)
155
+ #result
156
+ #answer['result']
157
+ #answer['source_documents']
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai == 0.27.8
2
+ langchain == 0.0.205
3
+ chromadb == 0.3.23
4
+ pypdf == 3.9.1
5
+ PyPDF2 == 3.0.1
6
+ googletrans==4.0.0rc1