naveenvenkatesh commited on
Commit
9a453dd
·
1 Parent(s): 1cd4218

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +677 -0
app.py ADDED
@@ -0,0 +1,677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ import pdfplumber
4
+ from langchain.chains.mapreduce import MapReduceChain
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.document_loaders import UnstructuredFileLoader
9
+ from langchain.prompts import PromptTemplate
10
+ import logging
11
+ import json
12
+ from typing import List
13
+ import mimetypes
14
+ import validators
15
+ import requests
16
+ import tempfile
17
+ from bs4 import BeautifulSoup
18
+ from langchain.chains import create_extraction_chain
19
+ from GoogleNews import GoogleNews
20
+ import pandas as pd
21
+ import requests
22
+ import gradio as gr
23
+ import re
24
+ from langchain.document_loaders import WebBaseLoader
25
+ from langchain.chains.llm import LLMChain
26
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
27
+ from transformers import pipeline
28
+ import plotly.express as px
29
+ import yfinance as yf
30
+ import pandas as pd
31
+ import nltk
32
+ from nltk.tokenize import sent_tokenize
33
+
34
+ class KeyValueExtractor:
35
+
36
+ def __init__(self):
37
+
38
+ """
39
+ Initialize the ContractSummarizer object.
40
+ Parameters:
41
+ pdf_file_path (str): The path to the input PDF file.
42
+ """
43
+ self.model = "facebook/bart-large-mnli"
44
+ openai.api_key = os.getenv('OPENAI_API_KEY')
45
+
46
+ def get_news(self,keyword):
47
+
48
+ googlenews = GoogleNews(lang='en', region='US', period='1d', encode='utf-8')
49
+ googlenews.clear()
50
+ googlenews.search(keyword)
51
+ googlenews.get_page(2)
52
+ news_result = googlenews.result(sort=True)
53
+ news_data_df = pd.DataFrame.from_dict(news_result)
54
+
55
+ news_data_df.info()
56
+
57
+ # Display header of dataframe.
58
+ news_data_df.head()
59
+
60
+ tot_news_link = []
61
+ for index, headers in news_data_df.iterrows():
62
+ news_link = str(headers['link'])
63
+ tot_news_link.append(news_link)
64
+
65
+ return tot_news_link
66
+
67
+ def url_format(self,urls):
68
+
69
+ tot_url_links = []
70
+ for url_text in urls:
71
+ # Define a regex pattern to match URLs starting with 'http' or 'https'
72
+ pattern = r'(https?://[^\s]+)'
73
+
74
+ # Search for the URL in the text using the regex pattern
75
+ match = re.search(pattern, url_text)
76
+
77
+ if match:
78
+ extracted_url = match.group(1)
79
+ tot_url_links.append(extracted_url)
80
+
81
+ else:
82
+ print("No URL found in the given text.")
83
+
84
+ return tot_url_links
85
+
86
+ def clear_error_ulr(self,urls):
87
+
88
+ error_url = []
89
+ for url in urls:
90
+ if validators.url(url):
91
+ headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',}
92
+ r = requests.get(url,headers=headers)
93
+ if r.status_code != 200:
94
+ # raise ValueError("Check the url of your file; returned status code %s" % r.status_code)
95
+ print(f"Error fetching {url}:")
96
+ error_url.append(url)
97
+ continue
98
+ cleaned_list_url = [item for item in urls if item not in error_url]
99
+ return cleaned_list_url
100
+
101
+ def get_each_link_summary(self,urls):
102
+
103
+ each_link_summary = ""
104
+
105
+ for url in urls:
106
+ loader = WebBaseLoader(url)
107
+ docs = loader.load()
108
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
109
+ chunk_size=3000, chunk_overlap=200
110
+ )
111
+
112
+ # Split the documents into chunks
113
+ split_docs = text_splitter.split_documents(docs)
114
+
115
+ # Prepare the prompt template for summarization
116
+ prompt_template = """Write a concise summary of the following:
117
+ {text}
118
+ CONCISE SUMMARY:"""
119
+ prompt = PromptTemplate.from_template(prompt_template)
120
+
121
+ # Prepare the template for refining the summary with additional context
122
+ refine_template = (
123
+ "Your job is to produce a final summary\n"
124
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
125
+ "We have the opportunity to refine the existing summary"
126
+ "(only if needed) with some more context below.\n"
127
+ "------------\n"
128
+ "{text}\n"
129
+ "------------\n"
130
+ "Given the new context, refine the original summary"
131
+ "If the context isn't useful, return the original summary."
132
+ )
133
+ refine_prompt = PromptTemplate.from_template(refine_template)
134
+
135
+ # Load the summarization chain using the ChatOpenAI language model
136
+ chain = load_summarize_chain(
137
+ llm = ChatOpenAI(temperature=0),
138
+ chain_type="refine",
139
+ question_prompt=prompt,
140
+ refine_prompt=refine_prompt,
141
+ return_intermediate_steps=True,
142
+ input_key="input_documents",
143
+ output_key="output_text",
144
+ )
145
+
146
+ # Generate the refined summary using the loaded summarization chain
147
+ result = chain({"input_documents": split_docs}, return_only_outputs=True)
148
+ print(result["output_text"])
149
+
150
+ # Return the refined summary
151
+ each_link_summary = each_link_summary + result["output_text"]
152
+
153
+ return each_link_summary
154
+
155
+ def save_text_to_file(self,each_link_summary) -> str:
156
+
157
+ """
158
+ Load the text from the saved file and split it into documents.
159
+ Returns:
160
+ List[str]: List of document texts.
161
+ """
162
+
163
+ # Get the path to the text file where the extracted text will be saved
164
+ file_path = "extracted_text.txt"
165
+ try:
166
+ with open(file_path, 'w') as file:
167
+ # Write the extracted text into the text file
168
+ file.write(each_link_summary)
169
+ # Return the file path of the saved text file
170
+ return file_path
171
+ except IOError as e:
172
+ # If an IOError occurs during the file saving process, log the error
173
+ logging.error(f"Error while saving text to file: {e}")
174
+
175
+ def document_loader(self,file_path) -> List[str]:
176
+
177
+ """
178
+ Load the text from the saved file and split it into documents.
179
+ Returns:
180
+ List[str]: List of document texts.
181
+ """
182
+
183
+ # Initialize the UnstructuredFileLoader
184
+ loader = UnstructuredFileLoader(file_path, strategy="fast")
185
+ # Load the documents from the file
186
+ docs = loader.load()
187
+
188
+ # Return the list of loaded document texts
189
+ return docs
190
+
191
+ def document_text_spilliter(self,docs) -> List[str]:
192
+
193
+ """
194
+ Split documents into chunks for efficient processing.
195
+ Returns:
196
+ List[str]: List of split document chunks.
197
+ """
198
+
199
+ # Initialize the text splitter with specified chunk size and overlap
200
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
201
+ chunk_size=3000, chunk_overlap=200
202
+ )
203
+
204
+ # Split the documents into chunks
205
+ split_docs = text_splitter.split_documents(docs)
206
+
207
+ # Return the list of split document chunks
208
+ return split_docs
209
+
210
+ def extract_key_value_pair_for_news(self,content) -> None:
211
+
212
+ """
213
+ Extract key-value pairs from the refined summary.
214
+ Prints the extracted key-value pairs.
215
+ """
216
+
217
+ try:
218
+
219
+ openai.api_key = os.getenv('OPENAI_API_KEY')
220
+
221
+ # Use OpenAI's Completion API to analyze the text and extract key-value pairs
222
+ response = openai.Completion.create(
223
+ engine="text-davinci-003", # You can choose a different engine as well
224
+ temperature = 0,
225
+ prompt=f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```.",
226
+ max_tokens=1000 # You can adjust the length of the response
227
+ )
228
+
229
+ # Extract and return the chatbot's reply
230
+ result = response['choices'][0]['text'].strip()
231
+ return result
232
+ except Exception as e:
233
+ # If an error occurs during the key-value extraction process, log the error
234
+ logging.error(f"Error while extracting key-value pairs: {e}")
235
+ print("Error:", e)
236
+
237
+ def refine_summary(self,split_docs) -> str:
238
+
239
+ """
240
+ Refine the summary using the provided context.
241
+ Returns:
242
+ str: Refined summary.
243
+ """
244
+
245
+ # Prepare the prompt template for summarization
246
+ prompt_template = """Write a detalied broad abractive summary of the following:
247
+ {text}
248
+ CONCISE SUMMARY:"""
249
+ prompt = PromptTemplate.from_template(prompt_template)
250
+
251
+ # Prepare the template for refining the summary with additional context
252
+ refine_template = (
253
+ "Your job is to produce a final summary\n"
254
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
255
+ "We have the opportunity to refine the existing summary"
256
+ "(only if needed) with some more context below.\n"
257
+ "------------\n"
258
+ "{text}\n"
259
+ "------------\n"
260
+ "Given the new context, refine the original summary"
261
+ "If the context isn't useful, return the original summary."
262
+ )
263
+ refine_prompt = PromptTemplate.from_template(refine_template)
264
+
265
+ # Load the summarization chain using the ChatOpenAI language model
266
+ chain = load_summarize_chain(
267
+ llm = ChatOpenAI(temperature=0),
268
+ chain_type="refine",
269
+ question_prompt=prompt,
270
+ refine_prompt=refine_prompt,
271
+ return_intermediate_steps=True,
272
+ input_key="input_documents",
273
+ output_key="output_text",
274
+ )
275
+
276
+ # Generate the refined summary using the loaded summarization chain
277
+ result = chain({"input_documents": split_docs}, return_only_outputs=True)
278
+
279
+ key_value_pair = self.extract_key_value_pair_for_news(result["output_text"])
280
+
281
+ # Return the refined summary
282
+ return result["output_text"],key_value_pair
283
+
284
+ def analyze_sentiment_for_graph(self, text):
285
+
286
+ pipe = pipeline("zero-shot-classification", model=self.model)
287
+ label=["Positive", "Negative", "Neutral"]
288
+ result = pipe(text, label)
289
+ sentiment_scores = {
290
+ result['labels'][0]: result['scores'][0],
291
+ result['labels'][1]: result['scores'][1],
292
+ result['labels'][2]: result['scores'][2]
293
+ }
294
+ return sentiment_scores
295
+
296
+ def display_graph_for_news(self,text):
297
+
298
+ sentiment_scores = self.analyze_sentiment_for_graph(text)
299
+ labels = sentiment_scores.keys()
300
+ scores = sentiment_scores.values()
301
+ fig = px.bar(x=scores, y=labels, orientation='h', color=labels, color_discrete_map={"Negative": "red", "Positive": "green", "Neutral": "gray"})
302
+ fig.update_traces(texttemplate='%{x:.1%}', textposition='outside',textfont=dict(size=6))
303
+ fig.update_layout(title="Sentiment Analysis",width=600)
304
+
305
+ formatted_pairs = []
306
+ for key, value in sentiment_scores.items():
307
+ formatted_value = round(value, 2) # Round the value to two decimal places
308
+ formatted_pairs.append(f"{key} : {formatted_value}")
309
+
310
+ result_string = '\t'.join(formatted_pairs)
311
+
312
+ return fig
313
+
314
+ def main_for_news(self,keyword):
315
+
316
+ urls = self.get_news(keyword)
317
+ tot_urls = self.url_format(urls)
318
+ clean_url = self.clear_error_ulr(tot_urls)
319
+ each_link_summary = self.get_each_link_summary(clean_url)
320
+ file_path = self.save_text_to_file(each_link_summary)
321
+ docs = self.document_loader(file_path)
322
+ split_docs = self.document_text_spilliter(docs)
323
+ result_summary_for_news,key_value_pair_for_news = self.refine_summary(split_docs)
324
+ fig = self.display_graph_for_news(result_summary_for_news)
325
+
326
+ return result_summary_for_news,key_value_pair_for_news,fig
327
+
328
+ def get_url(self,keyword):
329
+
330
+ return f"https://finance.yahoo.com/quote/{keyword}?p={keyword}"
331
+
332
+ def get_link_summary_for_finance(self,url):
333
+
334
+ loader = WebBaseLoader(url)
335
+ docs = loader.load()
336
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
337
+ chunk_size=3000, chunk_overlap=200
338
+ )
339
+
340
+ # Split the documents into chunks
341
+ split_docs = text_splitter.split_documents(docs)
342
+
343
+ # Prepare the prompt template for summarization
344
+ prompt_template = """The give text is Finance Stock Details for one company i want to get values for
345
+ Previous Close : [value]
346
+ Open : [value]
347
+ Bid : [value]
348
+ Ask : [value]
349
+ Day's Range : [value]
350
+ 52 Week Range : [value]
351
+ Volume : [value]
352
+ Avg. Volume : [value]
353
+ Market Cap : [value]
354
+ Beta (5Y Monthly) : [value]
355
+ PE Ratio (TTM) : [value]
356
+ EPS (TTM) : [value]
357
+ Earnings Date : [value]
358
+ Forward Dividend & Yield : [value]
359
+ Ex-Dividend Date : [value]
360
+ 1y Target Est : [value]
361
+ these details form that and Write a abractive summary about those details:
362
+ Given Text: {text}
363
+ CONCISE SUMMARY:"""
364
+ prompt = PromptTemplate.from_template(prompt_template)
365
+
366
+ # Prepare the template for refining the summary with additional context
367
+ refine_template = (
368
+ "Your job is to produce a final summary\n"
369
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
370
+ "We have the opportunity to refine the existing summary"
371
+ "(only if needed) with some more context below.\n"
372
+ "------------\n"
373
+ "{text}\n"
374
+ "------------\n"
375
+ "Given the new context, refine the original summary"
376
+ "If the context isn't useful, return the original summary."
377
+ )
378
+ refine_prompt = PromptTemplate.from_template(refine_template)
379
+
380
+ # Load the summarization chain using the ChatOpenAI language model
381
+ chain = load_summarize_chain(
382
+ llm = ChatOpenAI(temperature=0),
383
+ chain_type="refine",
384
+ question_prompt=prompt,
385
+ refine_prompt=refine_prompt,
386
+ return_intermediate_steps=True,
387
+ input_key="input_documents",
388
+ output_key="output_text",
389
+ )
390
+
391
+ # Generate the refined summary using the loaded summarization chain
392
+ result = chain({"input_documents": split_docs}, return_only_outputs=True)
393
+ print(result["output_text"])
394
+
395
+ return result["output_text"]
396
+
397
+ def one_day_summary_finance(self,content) -> None:
398
+
399
+ # Use OpenAI's Completion API to analyze the text and extract key-value pairs
400
+ response = openai.Completion.create(
401
+ engine="text-davinci-003", # You can choose a different engine as well
402
+ temperature = 0,
403
+ prompt=f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```.",
404
+ max_tokens=1000 # You can adjust the length of the response
405
+ )
406
+
407
+ # Extract and return the chatbot's reply
408
+ result = response['choices'][0]['text'].strip()
409
+ print(result)
410
+ return result
411
+
412
+ def extract_key_value_pair_for_finance(self,content) -> None:
413
+
414
+ """
415
+ Extract key-value pairs from the refined summary.
416
+ Prints the extracted key-value pairs.
417
+ """
418
+
419
+ try:
420
+
421
+ # Use OpenAI's Completion API to analyze the text and extract key-value pairs
422
+ response = openai.Completion.create(
423
+ engine="text-davinci-003", # You can choose a different engine as well
424
+ temperature = 0,
425
+ prompt=f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```.",
426
+ max_tokens=1000 # You can adjust the length of the response
427
+ )
428
+
429
+ # Extract and return the chatbot's reply
430
+ result = response['choices'][0]['text'].strip()
431
+ return result
432
+ except Exception as e:
433
+ # If an error occurs during the key-value extraction process, log the error
434
+ logging.error(f"Error while extracting key-value pairs: {e}")
435
+ print("Error:", e)
436
+
437
+ def analyze_sentiment_for_graph_finance(self, text):
438
+
439
+ pipe = pipeline("zero-shot-classification", model=self.model)
440
+ label=["Positive", "Negative", "Neutral"]
441
+ result = pipe(text, label)
442
+ sentiment_scores = {
443
+ result['labels'][0]: result['scores'][0],
444
+ result['labels'][1]: result['scores'][1],
445
+ result['labels'][2]: result['scores'][2]
446
+ }
447
+ return sentiment_scores
448
+
449
+ def display_graph_for_finance(self,text):
450
+
451
+ sentiment_scores = self.analyze_sentiment_for_graph_finance(text)
452
+ labels = sentiment_scores.keys()
453
+ scores = sentiment_scores.values()
454
+ fig = px.bar(x=scores, y=labels, orientation='h', color=labels, color_discrete_map={"Negative": "red", "Positive": "green", "Neutral": "gray"})
455
+ fig.update_traces(texttemplate='%{x:.1%}', textposition='outside',textfont=dict(size=6))
456
+ fig.update_layout(title="Sentiment Analysis",width=600)
457
+
458
+ formatted_pairs = []
459
+ for key, value in sentiment_scores.items():
460
+ formatted_value = round(value, 2) # Round the value to two decimal places
461
+ formatted_pairs.append(f"{key} : {formatted_value}")
462
+
463
+ result_string = '\t'.join(formatted_pairs)
464
+
465
+ return fig
466
+
467
+ def get_finance_data(self,symbol):
468
+
469
+ # Define the stock symbol and date range
470
+ start_date = '2022-08-19'
471
+ end_date = '2023-08-19'
472
+
473
+ # Fetch historical OHLC data using yfinance
474
+ data = yf.download(symbol, start=start_date, end=end_date)
475
+
476
+ # Select only the OHLC columns
477
+ ohlc_data = data[['Open', 'High', 'Low', 'Close']]
478
+
479
+ csv_path = "ohlc_data.csv"
480
+ # Save the OHLC data to a CSV file
481
+ ohlc_data.to_csv(csv_path)
482
+ return csv_path
483
+
484
+ def csv_to_dataframe(self,csv_path):
485
+
486
+ # Replace 'your_file.csv' with the actual path to your CSV file
487
+ csv_file_path = csv_path
488
+ # Read the CSV file into a DataFrame
489
+ df = pd.read_csv(csv_file_path)
490
+ # Now you can work with the 'df' DataFrame
491
+ return df # Display the first few rows of the DataFrame
492
+
493
+ def save_dataframe_in_text_file(self,df):
494
+
495
+ output_file_path = 'output.txt'
496
+
497
+ # Convert the DataFrame to a text file
498
+ df.to_csv(output_file_path, sep='\t', index=False)
499
+
500
+ return output_file_path
501
+
502
+ def csv_loader(self,output_file_path):
503
+
504
+ loader = UnstructuredFileLoader(output_file_path, strategy="fast")
505
+ docs = loader.load()
506
+
507
+ return docs
508
+
509
+ def document_text_spilliter_finance(self,docs):
510
+
511
+ """
512
+ Split documents into chunks for efficient processing.
513
+ Returns:
514
+ List[str]: List of split document chunks.
515
+ """
516
+
517
+ # Initialize the text splitter with specified chunk size and overlap
518
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
519
+ chunk_size=1000, chunk_overlap=200
520
+ )
521
+
522
+ # Split the documents into chunks
523
+ split_docs = text_splitter.split_documents(docs)
524
+
525
+ # Return the list of split document chunks
526
+ return split_docs
527
+
528
+ def change_bullet_points(self,text):
529
+
530
+ nltk.download('punkt') # Download the sentence tokenizer data (only need to run this once)
531
+
532
+ # Example passage
533
+ passage = text
534
+
535
+ # Tokenize the passage into sentences
536
+ sentences = sent_tokenize(passage)
537
+ bullet_string = ""
538
+ # Print the extracted sentences
539
+ for sentence in sentences:
540
+ bullet_string+="* "+sentence+"\n"
541
+
542
+ return bullet_string
543
+
544
+ def one_year_summary_for_finance(self,keyword):
545
+
546
+ csv_path = self.get_finance_data(keyword)
547
+ df = self.csv_to_dataframe(csv_path)
548
+ output_file_path = self.save_dataframe_in_text_file(df)
549
+ docs = self.csv_loader(output_file_path)
550
+ split_docs = self.document_text_spilliter(docs)
551
+
552
+ prompt_template = """Analyze the Financial Details and Write a abractive quick short summary how the company perform up and down,Bullish/Bearish of the following:
553
+ {text}
554
+ CONCISE SUMMARY:"""
555
+ prompt = PromptTemplate.from_template(prompt_template)
556
+
557
+ # Prepare the template for refining the summary with additional context
558
+ refine_template = (
559
+ "Your job is to produce a final summary\n"
560
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
561
+ "We have the opportunity to refine the existing summary"
562
+ "(only if needed) with some more context below.\n"
563
+ "------------\n"
564
+ "{text}\n"
565
+ "------------\n"
566
+ "Given the new context, refine the original summary"
567
+ "If the context isn't useful, return the original summary."
568
+ "10 line summary is enough"
569
+ )
570
+ refine_prompt = PromptTemplate.from_template(refine_template)
571
+
572
+ # Load the summarization chain using the ChatOpenAI language model
573
+ chain = load_summarize_chain(
574
+ llm = ChatOpenAI(temperature=0),
575
+ chain_type="refine",
576
+ question_prompt=prompt,
577
+ refine_prompt=refine_prompt,
578
+ return_intermediate_steps=True,
579
+ input_key="input_documents",
580
+ output_key="output_text",
581
+ )
582
+
583
+ # Generate the refined summary using the loaded summarization chain
584
+ result = chain({"input_documents": split_docs}, return_only_outputs=True)
585
+ one_year_perfomance_summary = self.change_bullet_points(result["output_text"])
586
+ plot_for_year = self.display_graph_for_finance(one_year_perfomance_summary)
587
+ # Return the refined summary
588
+ return one_year_perfomance_summary, plot_for_year
589
+
590
+ def main_for_finance_tool(self,keyword):
591
+
592
+
593
+ clean_url = self.get_url(keyword)
594
+ link_summary = self.get_link_summary_for_finance(clean_url)
595
+ clean_summary = self.one_day_summary_finance(link_summary)
596
+ key_value = self.extract_key_value_pair_for_finance(clean_summary)
597
+ sentiment_plot_for_one_day = self.display_graph_for_finance(clean_summary)
598
+
599
+ return clean_summary, key_value, sentiment_plot_for_one_day
600
+
601
+ def company_names(self,input_text):
602
+ words = input_text.split("-")
603
+ return words[1]
604
+
605
+ def gradio_interface(self):
606
+
607
+ with gr.Blocks(css="style.css",theme= 'karthikeyan-adople/hudsonhayes-gray') as app:
608
+ with gr.Tabs():
609
+ with gr.TabItem("Google News"):
610
+ gr.HTML("""<center class="darkblue" style='background-color:rgb(0,1,36); text-align:center;padding:25px;'><center><h1 class ="center">
611
+ <img src="file=logo.png" height="110px" width="280px"></h1></center>
612
+ <br><h1 style="color:#fff">Company performance summarisation and sentiment analysis</h1></center>""")
613
+ with gr.Row(elem_id="col-container"):
614
+ with gr.Column(scale=1.0, min_width=150, ):
615
+ input_news = gr.Textbox(label="NEWS")
616
+ with gr.Row(elem_id="col-container"):
617
+ with gr.Column(scale=1, min_width=150):
618
+ result_summary_for_news = gr.Textbox(label="Summary", lines = 8)
619
+ with gr.Row(elem_id="col-container"):
620
+ with gr.Column(scale=0.50, min_width=150):
621
+ key_value_pair_result_for_news = gr.Textbox(label="Key Value Pair", lines = 15)
622
+ with gr.Column(scale=0.50, min_width=50):
623
+ sentiment_plot =gr.Plot(label="Sentiment", size=(300, 300))
624
+ with gr.Row(elem_id="col-container"):
625
+ with gr.Column(scale=1.0, min_width=150):
626
+ get_summary_for_news = gr.Button("Analyse")
627
+
628
+ get_summary_for_news.click(self.main_for_news, input_news, [result_summary_for_news,key_value_pair_result_for_news,sentiment_plot])
629
+
630
+
631
+ with gr.TabItem("Finance Tool"):
632
+ gr.HTML("""<center class="darkblue" style='background-color:rgb(0,1,36); text-align:center;padding:25px;'><center><h1 class ="center">
633
+ <img src="file=logo.png" height="110px" width="280px"></h1></center>
634
+ <br><h1 style="color:#fff"> Company performance summarisation and sentiment analysis </h1></center>""")
635
+ with gr.Row(elem_id="col-container"):
636
+ with gr.Column(scale=1.0, min_width=150, ):
637
+ input_news = gr.Textbox(label="Company Name")
638
+ with gr.Row(elem_id="col-container"):
639
+ with gr.Column(scale=1.0, min_width=150 ):
640
+ gr.Examples(
641
+ [["Apple Inc. - AAPL"], ["Microsoft Corporation - MSFT"],["Amazon.com Inc. - AMZN"],["Facebook Inc. - FB"],["Tesla Inc. - TSLA"]],
642
+ [input_news],
643
+ input_news,
644
+ fn=self.company_names,
645
+ cache_examples=True,
646
+ )
647
+ with gr.Accordion("Get Summary for Last Day", open = False):
648
+ with gr.Row(elem_id="col-container"):
649
+ with gr.Column(scale=1.0, min_width=150):
650
+ analyse_summary_for_finance = gr.Button("Analyse")
651
+ with gr.Row(elem_id="col-container"):
652
+ with gr.Column(scale=1, min_width=150):
653
+ result_summary = gr.Textbox(label="Summary", lines = 10)
654
+ with gr.Row(elem_id="col-container"):
655
+ with gr.Column(scale=0.50, min_width=0):
656
+ key_value_pair_result = gr.Textbox(label="Key Value Pair", lines = 10)
657
+ with gr.Column(scale=0.50, min_width=0):
658
+ plot_for_one_day =gr.Plot(label="Sentiment", size=(500, 500))
659
+
660
+ with gr.Accordion("Get Summary for One Year", open = False):
661
+ with gr.Row(elem_id="col-container"):
662
+ with gr.Column(scale=1.0, min_width=150):
663
+ one_year = gr.Button("Analyse One Year Summary and Analyse Sentiment ")
664
+ with gr.Row(elem_id="col-container"):
665
+ with gr.Column(scale=1.0, min_width=150, ):
666
+ one_year_summary = gr.Textbox(label="Summary Of One Year Perfomance",lines = 20)
667
+ with gr.Column(scale=1.0, min_width=0):
668
+ plot_for_year =gr.Plot(label="Sentiment", size=(500, 500))
669
+
670
+
671
+ analyse_summary_for_finance.click(self.main_for_finance_tool, input_news, [result_summary,key_value_pair_result,plot_for_one_day])
672
+ one_year.click(self.one_year_summary_for_finance,input_news,[one_year_summary,plot_for_year])
673
+
674
+ app.launch(debug = True)
675
+
676
+ text_process = KeyValueExtractor()
677
+ text_process.gradio_interface()