Spaces:

datagpt
/

url2info

Runtime error

App Files Files Community

gptaibox commited on Apr 18, 2023

Commit

e1ee643

1 Parent(s): 2d01fac

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -63

app.py CHANGED Viewed

@@ -1,103 +1,67 @@
 import gradio as gr
 import os
 import openai
-from newspaper import Article
-from newspaper import Config
 import json
 import re
 from transformers import GPT2Tokenizer
-import requests
 # define the text summarizer function
-def text_prompt(request, system_role, page_url, contraseña, temp):
     try:
-        USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
-        config = Config()
-        config.browser_user_agent = USER_AGENT
-        config.request_timeout = 10
-        article = Article(page_url, config=config)
-        article.download()
-        article.parse()
     except Exception as e:
         return "", f"--- An error occurred while processing the URL: {e} ---", ""
     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-    # TODO: for chinese, separator is '。'
-    sentences = article.text.split('.')
-    tokens = []
-    page_text = ""
-    for sentence in sentences:
-        tokens.extend(tokenizer.tokenize(sentence))
-        # Trim text to a maximum of 3100 tokens
-        if len(tokens) > 3100:
-            break
-        page_text += sentence + ". "
-    # Delete the last space
-    page_text = page_text.strip()
     num_tokens = len(tokens)
-    if num_tokens > 10 and contraseña.startswith("sk-"):
         openai.api_key = contraseña
         # get the response from openai API
         try:
-            response = openai.ChatCompletion.create(
-                model="text-davinci-003",
-                messages=[
-                    {"role": "system", "content": system_role},
-                    {"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
-                ],
-                max_tokens=512,
                 temperature=temp,
-                top_p=1.0,
             )
             # get the response text
-            response_text = response['choices'][0]['message']['content']
-            total_tokens = response["usage"]["total_tokens"]
             # clean the response text
             response_text = re.sub(r'\s+', ' ', response_text)
-            response_text = f"#### [{page.title}]({article_url})\n\n{response_text.strip()}"
-            total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)
-            return article.text, response_text, total_tokens_str
         except Exception as e:
-            return article.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
-    return article.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)
 # define the gradio interface
 iface = gr.Interface(
     fn=text_prompt,
     inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
-            gr.Textbox(lines=1, placeholder="Enter your gpt-role description here...", label="GPT Role:", type="text"),
-            gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
             gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
-            gr.Slider(0.0,1.0, value=0.7, label="Temperature:")
             ],
-    outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
-    examples=[["请用简体中文生成一段200字的摘要, 并提取5个关键词.", "Act as a News Editor", "https://openai.com/blog/planning-for-agi-and-beyond","",0.7],
-            ["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
-            ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
-    ],
-    title="ChatGPT info extraction from URL",
-    description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
 )
-# error capturing in integration as a component
 error_message = ""
 try:
-    iface.queue(concurrency_count=20)
     iface.launch()
 except Exception as e:
     error_message = "An error occurred: " + str(e)

 import gradio as gr
 import os
 import openai
+import newspaper
 import json
 import re
 from transformers import GPT2Tokenizer
 # define the text summarizer function
+def text_prompt(request, page_url, contraseña, temp):
     try:
+        page = newspaper.Article(url=page_url)
+        page.download()
+        page.parse()
     except Exception as e:
         return "", f"--- An error occurred while processing the URL: {e} ---", ""
     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+    tokens = tokenizer.tokenize(page.text)
     num_tokens = len(tokens)
+    if num_tokens > 10 and num_tokens < 2000:
         openai.api_key = contraseña
         # get the response from openai API
         try:
+            response = openai.Completion.create(
+                engine="text-davinci-003",
+                prompt=request + "\n\n" + page.text,
+                max_tokens=2048,
                 temperature=temp,
+                top_p=0.9,
             )
             # get the response text
+            response_text = response.choices[0].text
             # clean the response text
             response_text = re.sub(r'\s+', ' ', response_text)
+            return page.text, response_text, num_tokens
         except Exception as e:
+            return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
+    return page.text, "--- Max number of tokens ---", num_tokens
 # define the gradio interface
 iface = gr.Interface(
     fn=text_prompt,
     inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
+            gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:", type="text"),
             gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
+            gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
             ],
+    outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Tokens:")],
+     examples=[["Summarize the following text in Chinese, and give 5 keywords:","https://openai.com/blog/planning-for-agi-and-beyond","",0.5],
+            ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7]
+     ],
+    title="Information Extraction Interface:",
+    # description="This tool allows querying the text retrieved from the URL using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 2000 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
 )
 error_message = ""
 try:
     iface.launch()
 except Exception as e:
     error_message = "An error occurred: " + str(e)