ram36 commited on
Commit
2762a28
·
verified ·
1 Parent(s): e1d4e57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -62
app.py CHANGED
@@ -1,63 +1,235 @@
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip install gradio
2
+
3
+
4
+ !pip install datasets tqdm pandas matplotlib langchain sentence_transformers faiss-gpu langchain-community torch accelerate
5
+
6
  import gradio as gr
7
+ import pandas as pd
8
+ from tqdm.notebook import tqdm
9
+ from datasets import Dataset
10
+ import matplotlib.pyplot as plt
11
+ from langchain.docstore.document import Document as LangchainDocument
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from sentence_transformers import SentenceTransformer
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
15
+ from langchain.vectorstores import FAISS
16
+ from langchain_community.embeddings import HuggingFaceEmbeddings
17
+ from langchain_community.vectorstores.utils import DistanceStrategy
18
+ import torch
19
+
20
+
21
+ # Set display option for pandas
22
+ pd.set_option("display.max_colwidth", None)
23
+
24
+ # Open and read the first file
25
+ with open("/content/iplteams_info.txt", "r") as fp1:
26
+ content1 = fp1.read()
27
+
28
+ # Open and read the second file
29
+ with open("/content/match_summaries_sentences.txt", "r") as fp2:
30
+ content2 = fp2.read()
31
+
32
+ # Open and read the second file
33
+ with open("/content/formatted_playersinfo.txt", "r") as fp3:
34
+ content3 = fp3.read()
35
+
36
+
37
+ # Combine contents of both files, separated by three newlines
38
+ combined_content = content1 + "\n\n\n" + content2 + "\n\n\n" + content3
39
+
40
+ # Split the combined content into sections
41
+ s = combined_content.split("\n\n\n")
42
+
43
+ # Print the first section and the number of sections
44
+ print(s[0])
45
+ print(len(s))
46
+
47
+ # Create a RAW_KNOWLEDGE_BASE using LangchainDocument
48
+ RAW_KNOWLEDGE_BASE = [
49
+ LangchainDocument(page_content=doc)
50
+ for doc in tqdm(s)
51
+ ]
52
+
53
+
54
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
55
+ from sentence_transformers import SentenceTransformer
56
+ from transformers import AutoTokenizer
57
+
58
+ MARKDOWN_SEPARATORS = [
59
+ "\n#{1,6}",
60
+ "```\n",
61
+ "\n\\*\\*\\*+\n",
62
+ "\n---+\n",
63
+ "\n__+\n",
64
+ "\n\n",
65
+ "\n",
66
+ " ",
67
+ ""
68
+ ]
69
+
70
+ text_splitter = RecursiveCharacterTextSplitter(
71
+ chunk_size=1000,
72
+ chunk_overlap=100,
73
+ add_start_index=True,
74
+ strip_whitespace=True,
75
+ separators=MARKDOWN_SEPARATORS,
76
+ )
77
+
78
+ docs_processed = []
79
+ for doc in RAW_KNOWLEDGE_BASE:
80
+ docs_processed += text_splitter.split_documents([doc])
81
+
82
+ tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
83
+ lengths = [len(tokenizer.encode(doc.page_content)) for doc in tqdm(docs_processed)]
84
+
85
+ fig = pd.Series(lengths).hist()
86
+ fig.set_title("Histogram of Document Lengths")
87
+ plt.title("Distribution")
88
+ plt.show()
89
+
90
+
91
+ from typing import Optional, List
92
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
93
+ from transformers import AutoTokenizer
94
+
95
+ EMBEDDING_MODEL_NAME = "thenlper/gte-small"
96
+
97
+ def split_documents(
98
+ chunk_size: int,
99
+ knowledge_base: list[LangchainDocument],
100
+ tokenizer_name: Optional[str] = EMBEDDING_MODEL_NAME,
101
+ ) -> List[LangchainDocument]:
102
+ text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
103
+ AutoTokenizer.from_pretrained(tokenizer_name),
104
+ chunk_size=chunk_size,
105
+ chunk_overlap=int(chunk_size / 10),
106
+ add_start_index=True,
107
+ strip_whitespace=True,
108
+ separators=MARKDOWN_SEPARATORS,
109
+ )
110
+ docs_processed = []
111
+ for doc in knowledge_base:
112
+ docs_processed += text_splitter.split_documents([doc])
113
+
114
+ unique_texts = {}
115
+ docs_processed_unique = []
116
+ for doc in docs_processed:
117
+ if doc.page_content not in unique_texts:
118
+ unique_texts[doc.page_content] = True
119
+ docs_processed_unique.append(doc)
120
+ return docs_processed_unique
121
+
122
+ docs_processed = split_documents(512, RAW_KNOWLEDGE_BASE, tokenizer_name=EMBEDDING_MODEL_NAME)
123
+ print(len(docs_processed))
124
+ print(docs_processed[0:3])
125
+
126
+
127
+ from langchain.vectorstores import FAISS
128
+ from langchain_community.embeddings import HuggingFaceEmbeddings
129
+ from langchain_community.vectorstores.utils import DistanceStrategy
130
+ import torch
131
+
132
+ print(torch.cuda.is_available())
133
+
134
+ embedding_model = HuggingFaceEmbeddings(
135
+ model_name=EMBEDDING_MODEL_NAME,
136
+ multi_process=True,
137
+ model_kwargs={"device": "cuda"},
138
+ encode_kwargs={"normalize_embeddings": True},
139
+ )
140
+
141
+ KNOWLEDGE_VECTOR_DATABASE = FAISS.from_documents(
142
+ docs_processed,
143
+ embedding_model,
144
+ distance_strategy=DistanceStrategy.COSINE,
145
+ )
146
+
147
+
148
+ import torch
149
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
150
+
151
+ torch.random.manual_seed(0)
152
+
153
+ model = AutoModelForCausalLM.from_pretrained(
154
+ "microsoft/Phi-3-mini-128k-instruct",
155
+ device_map="cuda",
156
+ torch_dtype="auto",
157
+ trust_remote_code=True,
158
+ )
159
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
160
+
161
+ pipe = pipeline(
162
+ "text-generation",
163
+ model=model,
164
+ tokenizer=tokenizer,
165
+ )
166
+
167
+ generation_args = {
168
+ "max_new_tokens": 500,
169
+ "return_full_text": False,
170
+ "temperature": 0.0,
171
+ "do_sample": False,
172
+ }
173
+
174
+
175
+ prompt_chat=[
176
+ {
177
+ "role":"system",
178
+ "content":"""Using the information contained in the context,
179
+ Give a comprehensive answer to the question.
180
+ Respond only to the question asked , response should be concise and relevant to the question.
181
+ provide the number of the source document when relevant.
182
+ If the answer cannot be deduced from the context, do not give an answer""",
183
+
184
+ },
185
+ {
186
+ "role":"user",
187
+ "content":"""Context:
188
+ {context}
189
+ ---
190
+ Now here is the Question you need to answer.
191
+ Question:{question}
192
+ """,
193
+ },
194
+ ]
195
+ RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
196
+ prompt_chat,tokenize = False,add_generation_prompt=True,
197
+
198
+ )
199
+ print(RAG_PROMPT_TEMPLATE)
200
+
201
+ u_query = "give the match summary of royal challengers bengaluru and mumbai indians in 2024"
202
+ # ret_text = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=u_query,k=3)
203
+ retrieved_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=u_query,k=3)
204
+
205
+ context = retrieved_docs[0].page_content
206
+ final_prompt = RAG_PROMPT_TEMPLATE.format(
207
+ question= u_query, context = context
208
+ )
209
+
210
+ output = pipe(final_prompt, **generation_args)
211
+ print("YOUR QUESTION:\n",u_query,"\n")
212
+ print("MICROSOFT 128K ANSWER: \n",output[0]['generated_text'])
213
+
214
+ # Define the function to handle queries
215
+ def handle_query(question):
216
+ retrieved_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=question, k=3)
217
+ context = retrieved_docs[0].page_content
218
+ final_prompt = RAG_PROMPT_TEMPLATE.format(
219
+ question=question, context=context
220
+ )
221
+ output = pipe(final_prompt, **generation_args)
222
+ return output[0]['generated_text']
223
+
224
+
225
+ # Create a Gradio interface
226
+ interface = gr.Interface(
227
+ fn=handle_query,
228
+ inputs="text",
229
+ outputs="text",
230
+ title="IPL Match Summary Generator",
231
+ description="Get the match summary of IPL teams based on your query.",
232
+ )
233
+
234
+ interface.launch(share=True)
235
+