danjel145 commited on
Commit
77db1fb
·
verified ·
1 Parent(s): 36d4c68

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.document_loaders import DirectoryLoader
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain.vectorstores.chroma import Chroma
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.prompts import ChatPromptTemplate
9
+ import gradio as gr
10
+
11
+ script_directory = os.path.dirname(os.path.abspath(__file__))
12
+ DATA_PATH = os.path.join(script_directory, "docs")
13
+ CHROMA_PATH = "chroma"
14
+
15
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
16
+
17
+ PROMPT_TEMPLATE = """
18
+ nswer the question based only on the following context:
19
+ {context}
20
+ ---
21
+ Answer the question based on the above context: {question}
22
+ """
23
+
24
+ def load_documents():
25
+
26
+ loader = DirectoryLoader(DATA_PATH, glob="*.pdf")
27
+ documents = loader.load()
28
+ return documents
29
+
30
+ def split_text(documents):
31
+ text_splitter = RecursiveCharacterTextSplitter(
32
+ chunk_size=300,
33
+ chunk_overlap=100,
34
+ length_function=len,
35
+ add_start_index=True,
36
+ )
37
+ chunks = text_splitter.split_documents(documents)
38
+ print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
39
+ return chunks
40
+
41
+ def save_to_chroma(chunks):
42
+ # Clear out the database first.
43
+ if os.path.exists(CHROMA_PATH):
44
+ shutil.rmtree(CHROMA_PATH)
45
+
46
+ embeddings = OpenAIEmbeddings()
47
+ # Create a new DB from the documents.
48
+ db = Chroma.from_documents(
49
+ chunks, embeddings, persist_directory=CHROMA_PATH
50
+ )
51
+ db.persist()
52
+ print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")
53
+
54
+
55
+ def get_response(query_text):
56
+
57
+ # Prepare the DB.
58
+ embedding_function = OpenAIEmbeddings()
59
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
60
+
61
+
62
+ results = db.similarity_search_with_relevance_scores(query_text, k=4)
63
+ if len(results) == 0 or results[0][1] < 0.7:
64
+ print(f"Unable to find matching results.")
65
+ return
66
+
67
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
68
+
69
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
70
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
71
+ prompt = prompt_template.format(context=context_text, question=query_text)
72
+
73
+ model = ChatOpenAI()
74
+ response_text = model.predict(prompt)
75
+
76
+ sources = [doc.metadata.get("source", None) for doc, _score in results]
77
+ formatted_response = f"Response: {response_text}\nSources: {sources}"
78
+ print(formatted_response)
79
+
80
+ def chatbot(query_text):
81
+ documents = load_documents()
82
+ chunks = split_text(documents)
83
+ save_to_chroma(chunks)
84
+ get_response(query_text)
85
+
86
+
87
+
88
+
89
+
90
+ iface = gr.Interface(fn=chatbot,
91
+ inputs=gr.components.Textbox(lines=7, label="Enter your text"),
92
+ outputs="text",
93
+ title="UK Insurance Law AI Tool")
94
+
95
+
96
+
97
+ iface.launch()