gptaibox commited on
Commit
e1ee643
·
1 Parent(s): 2d01fac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -63
app.py CHANGED
@@ -1,103 +1,67 @@
1
  import gradio as gr
2
  import os
3
  import openai
4
- from newspaper import Article
5
- from newspaper import Config
6
-
7
  import json
8
  import re
9
  from transformers import GPT2Tokenizer
10
- import requests
11
 
12
  # define the text summarizer function
13
- def text_prompt(request, system_role, page_url, contraseña, temp):
14
  try:
15
- USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
16
-
17
- config = Config()
18
- config.browser_user_agent = USER_AGENT
19
- config.request_timeout = 10
20
-
21
- article = Article(page_url, config=config)
22
- article.download()
23
- article.parse()
24
-
25
  except Exception as e:
26
  return "", f"--- An error occurred while processing the URL: {e} ---", ""
27
-
28
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
29
- # TODO: for chinese, separator is '。'
30
- sentences = article.text.split('.')
31
-
32
- tokens = []
33
- page_text = ""
34
-
35
- for sentence in sentences:
36
- tokens.extend(tokenizer.tokenize(sentence))
37
-
38
- # Trim text to a maximum of 3100 tokens
39
- if len(tokens) > 3100:
40
- break
41
- page_text += sentence + ". "
42
-
43
- # Delete the last space
44
- page_text = page_text.strip()
45
 
 
46
  num_tokens = len(tokens)
47
 
48
- if num_tokens > 10 and contraseña.startswith("sk-"):
49
  openai.api_key = contraseña
50
  # get the response from openai API
51
  try:
52
- response = openai.ChatCompletion.create(
53
- model="text-davinci-003",
54
- messages=[
55
- {"role": "system", "content": system_role},
56
- {"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
57
- ],
58
- max_tokens=512,
59
  temperature=temp,
60
- top_p=1.0,
61
  )
62
  # get the response text
63
- response_text = response['choices'][0]['message']['content']
64
- total_tokens = response["usage"]["total_tokens"]
65
-
66
  # clean the response text
67
  response_text = re.sub(r'\s+', ' ', response_text)
68
- response_text = f"#### [{page.title}]({article_url})\n\n{response_text.strip()}"
69
- total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)
70
-
71
-
72
- return article.text, response_text, total_tokens_str
73
  except Exception as e:
74
- return article.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
75
- return article.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)
76
 
77
  # define the gradio interface
78
  iface = gr.Interface(
79
  fn=text_prompt,
80
  inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
81
- gr.Textbox(lines=1, placeholder="Enter your gpt-role description here...", label="GPT Role:", type="text"),
82
- gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
83
  gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
84
- gr.Slider(0.0,1.0, value=0.7, label="Temperature:")
85
  ],
86
- outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
87
- examples=[["请用简体中文生成一段200字的摘要, 并提取5个关键词.", "Act as a News Editor", "https://openai.com/blog/planning-for-agi-and-beyond","",0.7],
88
- ["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
89
- ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
90
- ],
91
- title="ChatGPT info extraction from URL",
92
- description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
93
  )
94
 
95
- # error capturing in integration as a component
96
 
97
  error_message = ""
98
 
99
  try:
100
- iface.queue(concurrency_count=20)
101
  iface.launch()
102
  except Exception as e:
103
  error_message = "An error occurred: " + str(e)
 
1
  import gradio as gr
2
  import os
3
  import openai
4
+ import newspaper
 
 
5
  import json
6
  import re
7
  from transformers import GPT2Tokenizer
8
+
9
 
10
  # define the text summarizer function
11
+ def text_prompt(request, page_url, contraseña, temp):
12
  try:
13
+ page = newspaper.Article(url=page_url)
14
+ page.download()
15
+ page.parse()
 
 
 
 
 
 
 
16
  except Exception as e:
17
  return "", f"--- An error occurred while processing the URL: {e} ---", ""
18
+
19
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ tokens = tokenizer.tokenize(page.text)
22
  num_tokens = len(tokens)
23
 
24
+ if num_tokens > 10 and num_tokens < 2000:
25
  openai.api_key = contraseña
26
  # get the response from openai API
27
  try:
28
+ response = openai.Completion.create(
29
+ engine="text-davinci-003",
30
+ prompt=request + "\n\n" + page.text,
31
+ max_tokens=2048,
 
 
 
32
  temperature=temp,
33
+ top_p=0.9,
34
  )
35
  # get the response text
36
+ response_text = response.choices[0].text
 
 
37
  # clean the response text
38
  response_text = re.sub(r'\s+', ' ', response_text)
39
+ return page.text, response_text, num_tokens
 
 
 
 
40
  except Exception as e:
41
+ return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
42
+ return page.text, "--- Max number of tokens ---", num_tokens
43
 
44
  # define the gradio interface
45
  iface = gr.Interface(
46
  fn=text_prompt,
47
  inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
48
+ gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:", type="text"),
 
49
  gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
50
+ gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
51
  ],
52
+ outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Tokens:")],
53
+ examples=[["Summarize the following text in Chinese, and give 5 keywords:","https://openai.com/blog/planning-for-agi-and-beyond","",0.5],
54
+ ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7]
55
+ ],
56
+ title="Information Extraction Interface:",
57
+ # description="This tool allows querying the text retrieved from the URL using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 2000 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
 
58
  )
59
 
60
+
61
 
62
  error_message = ""
63
 
64
  try:
 
65
  iface.launch()
66
  except Exception as e:
67
  error_message = "An error occurred: " + str(e)