Spaces:
Sleeping
Sleeping
app
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ import docx
|
|
| 7 |
import time
|
| 8 |
from langchain_community.llms import OpenAI
|
| 9 |
from langchain.chains import ConversationChain
|
|
|
|
| 10 |
from langchain_core.prompts import PromptTemplate
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
import os
|
|
@@ -20,34 +21,56 @@ sentiment_analyzer = pipeline("sentiment-analysis")
|
|
| 20 |
topic_classifier = pipeline("zero-shot-classification")
|
| 21 |
|
| 22 |
def fetch_text_from_url(url):
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def extract_text_from_pdf(file):
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def extract_text_from_docx(file):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
| 42 |
if input_type == "URL":
|
| 43 |
progress(0, desc="Fetching text from URL")
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
elif input_type == "File":
|
| 46 |
progress(0, desc="Extracting text from file")
|
| 47 |
-
if input_text
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
else:
|
| 52 |
input_text = input_text.read().decode("utf-8")
|
| 53 |
|
|
@@ -75,20 +98,22 @@ def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
|
| 75 |
|
| 76 |
return original_text, summary, sentiment, ", ".join(topics)
|
| 77 |
|
| 78 |
-
def chat(input_text,
|
| 79 |
prompt_template = """
|
| 80 |
Assistant is an AI language model that helps with text analysis tasks.
|
| 81 |
|
| 82 |
-
|
|
|
|
|
|
|
| 83 |
Human: {input_text}
|
| 84 |
Assistant:"""
|
| 85 |
|
| 86 |
prompt = PromptTemplate(
|
| 87 |
-
input_variables=["
|
| 88 |
template=prompt_template
|
| 89 |
)
|
| 90 |
|
| 91 |
-
chain = ConversationChain(llm=llm, prompt=prompt)
|
| 92 |
response = chain.predict(input_text=input_text)
|
| 93 |
|
| 94 |
return response
|
|
@@ -133,13 +158,8 @@ def create_interface():
|
|
| 133 |
input_value = url
|
| 134 |
else:
|
| 135 |
input_value = file
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
|
| 139 |
-
except Exception as e:
|
| 140 |
-
original_text = f"Error: {str(e)}"
|
| 141 |
-
summary, sentiment, topics = "", "", ""
|
| 142 |
-
|
| 143 |
return original_text, summary, sentiment, topics
|
| 144 |
|
| 145 |
submit_button.click(
|
|
@@ -152,7 +172,7 @@ def create_interface():
|
|
| 152 |
conversation_history.append(f"Human: {conversation_input}")
|
| 153 |
response = chat(conversation_input, "\n".join(conversation_history))
|
| 154 |
conversation_history.append(f"Assistant: {response}")
|
| 155 |
-
return conversation_history, "", response
|
| 156 |
|
| 157 |
conversation_button.click(
|
| 158 |
fn=process_conversation,
|
|
|
|
| 7 |
import time
|
| 8 |
from langchain_community.llms import OpenAI
|
| 9 |
from langchain.chains import ConversationChain
|
| 10 |
+
from langchain.memory import ConversationBufferMemory
|
| 11 |
from langchain_core.prompts import PromptTemplate
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
import os
|
|
|
|
| 21 |
topic_classifier = pipeline("zero-shot-classification")
|
| 22 |
|
| 23 |
def fetch_text_from_url(url):
|
| 24 |
+
try:
|
| 25 |
+
response = requests.get(url)
|
| 26 |
+
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
|
| 27 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 28 |
+
return " ".join(p.get_text() for p in soup.find_all("p"))
|
| 29 |
+
except requests.exceptions.RequestException as e:
|
| 30 |
+
raise ValueError(f"Error fetching text from URL: {str(e)}")
|
| 31 |
|
| 32 |
def extract_text_from_pdf(file):
|
| 33 |
+
try:
|
| 34 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 35 |
+
text = ""
|
| 36 |
+
for page in pdf_reader.pages:
|
| 37 |
+
text += page.extract_text()
|
| 38 |
+
return text
|
| 39 |
+
except PyPDF2.errors.PdfReadError as e:
|
| 40 |
+
raise ValueError(f"Error extracting text from PDF: {str(e)}")
|
| 41 |
|
| 42 |
def extract_text_from_docx(file):
|
| 43 |
+
try:
|
| 44 |
+
doc = docx.Document(file)
|
| 45 |
+
text = ""
|
| 46 |
+
for para in doc.paragraphs:
|
| 47 |
+
text += para.text + "\n"
|
| 48 |
+
return text
|
| 49 |
+
except docx.opc.exceptions.PackageNotFoundError as e:
|
| 50 |
+
raise ValueError(f"Error extracting text from DOCX: {str(e)}")
|
| 51 |
|
| 52 |
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
| 53 |
if input_type == "URL":
|
| 54 |
progress(0, desc="Fetching text from URL")
|
| 55 |
+
try:
|
| 56 |
+
input_text = fetch_text_from_url(input_text)
|
| 57 |
+
except ValueError as e:
|
| 58 |
+
return str(e), "", "", ""
|
| 59 |
elif input_type == "File":
|
| 60 |
progress(0, desc="Extracting text from file")
|
| 61 |
+
if input_text is None:
|
| 62 |
+
return "No file uploaded", "", "", ""
|
| 63 |
+
file_name = input_text.name.lower()
|
| 64 |
+
if file_name.endswith(".pdf"):
|
| 65 |
+
try:
|
| 66 |
+
input_text = extract_text_from_pdf(input_text)
|
| 67 |
+
except ValueError as e:
|
| 68 |
+
return str(e), "", "", ""
|
| 69 |
+
elif file_name.endswith(".docx"):
|
| 70 |
+
try:
|
| 71 |
+
input_text = extract_text_from_docx(input_text)
|
| 72 |
+
except ValueError as e:
|
| 73 |
+
return str(e), "", "", ""
|
| 74 |
else:
|
| 75 |
input_text = input_text.read().decode("utf-8")
|
| 76 |
|
|
|
|
| 98 |
|
| 99 |
return original_text, summary, sentiment, ", ".join(topics)
|
| 100 |
|
| 101 |
+
def chat(input_text, conversation_history):
|
| 102 |
prompt_template = """
|
| 103 |
Assistant is an AI language model that helps with text analysis tasks.
|
| 104 |
|
| 105 |
+
Conversation history:
|
| 106 |
+
{conversation_history}
|
| 107 |
+
|
| 108 |
Human: {input_text}
|
| 109 |
Assistant:"""
|
| 110 |
|
| 111 |
prompt = PromptTemplate(
|
| 112 |
+
input_variables=["conversation_history", "input_text"],
|
| 113 |
template=prompt_template
|
| 114 |
)
|
| 115 |
|
| 116 |
+
chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
|
| 117 |
response = chain.predict(input_text=input_text)
|
| 118 |
|
| 119 |
return response
|
|
|
|
| 158 |
input_value = url
|
| 159 |
else:
|
| 160 |
input_value = file
|
| 161 |
+
|
| 162 |
+
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
return original_text, summary, sentiment, topics
|
| 164 |
|
| 165 |
submit_button.click(
|
|
|
|
| 172 |
conversation_history.append(f"Human: {conversation_input}")
|
| 173 |
response = chat(conversation_input, "\n".join(conversation_history))
|
| 174 |
conversation_history.append(f"Assistant: {response}")
|
| 175 |
+
return "\n".join(conversation_history), "", response
|
| 176 |
|
| 177 |
conversation_button.click(
|
| 178 |
fn=process_conversation,
|