Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import PyPDF2 | |
| import docx | |
| import time | |
| from langchain_community.llms import OpenAI | |
| from langchain.chains import ConversationChain | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_core.prompts import PromptTemplate | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() # Load environment variables from .env file | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| llm = OpenAI(openai_api_key=openai_api_key) | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| sentiment_analyzer = pipeline("sentiment-analysis") | |
| topic_classifier = pipeline("zero-shot-classification") | |
| def fetch_text_from_url(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() # Raise an exception for 4xx or 5xx status codes | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| return " ".join(p.get_text() for p in soup.find_all("p")) | |
| except requests.exceptions.RequestException as e: | |
| raise ValueError(f"Error fetching text from URL: {str(e)}") | |
| def extract_text_from_pdf(file): | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| except PyPDF2.errors.PdfReadError as e: | |
| raise ValueError(f"Error extracting text from PDF: {str(e)}") | |
| def extract_text_from_docx(file): | |
| try: | |
| doc = docx.Document(file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| except docx.opc.exceptions.PackageNotFoundError as e: | |
| raise ValueError(f"Error extracting text from DOCX: {str(e)}") | |
| def analyze_text(input_text, input_type, tasks, progress=gr.Progress()): | |
| if input_type == "URL": | |
| progress(0, desc="Fetching text from URL") | |
| try: | |
| input_text = fetch_text_from_url(input_text) | |
| except ValueError as e: | |
| return str(e), "", "", "" | |
| elif input_type == "File": | |
| progress(0, desc="Extracting text from file") | |
| if input_text is None: | |
| return "No file uploaded", "", "", "" | |
| file_name = input_text.name.lower() | |
| if file_name.endswith(".pdf"): | |
| try: | |
| input_text = extract_text_from_pdf(input_text) | |
| except ValueError as e: | |
| return str(e), "", "", "" | |
| elif file_name.endswith(".docx"): | |
| try: | |
| input_text = extract_text_from_docx(input_text) | |
| except ValueError as e: | |
| return str(e), "", "", "" | |
| else: | |
| input_text = input_text.read().decode("utf-8") | |
| original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "") | |
| summary, sentiment, topics = "", "", "" | |
| if "Summarization" in tasks: | |
| progress(0.3, desc="Generating summary") | |
| summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] | |
| time.sleep(1) # Add a minimal delay for demonstration purposes | |
| if "Sentiment Analysis" in tasks: | |
| progress(0.6, desc="Analyzing sentiment") | |
| sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis | |
| time.sleep(1) | |
| if "Topic Detection" in tasks: | |
| progress(0.9, desc="Detecting topics") | |
| topic_labels = ["technology", "politics", "sports", "entertainment", "business"] | |
| topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection | |
| time.sleep(1) | |
| progress(1, desc="Analysis completed") | |
| return original_text, summary, sentiment, ", ".join(topics) | |
| def chat(input_text, conversation_history): | |
| prompt_template = """ | |
| Assistant is an AI language model that helps with text analysis tasks. | |
| Conversation history: | |
| {conversation_history} | |
| Human: {input_text} | |
| Assistant:""" | |
| prompt = PromptTemplate( | |
| input_variables=["conversation_history", "input_text"], | |
| template=prompt_template | |
| ) | |
| chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history")) | |
| response = chain.predict(input_text=input_text) | |
| return response | |
| def create_interface(): | |
| with gr.Blocks(title="Text Analysis App") as interface: | |
| gr.Markdown("## Choose data format to analyze") | |
| input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type") | |
| text_input = gr.Textbox(label="Text Input", visible=False) | |
| url_input = gr.Textbox(label="URL Input", visible=False) | |
| file_input = gr.File(label="File Upload", visible=False) | |
| tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks") | |
| submit_button = gr.Button("Analyze") | |
| progress_bar = gr.Progress() | |
| with gr.Tab("Original Text"): | |
| original_text_output = gr.Textbox(label="Original Text") | |
| with gr.Tab("Summary"): | |
| summary_output = gr.Textbox(label="Summary") | |
| with gr.Tab("Sentiment"): | |
| sentiment_output = gr.Textbox(label="Sentiment") | |
| with gr.Tab("Topics"): | |
| topics_output = gr.Textbox(label="Topics") | |
| with gr.Tab("Conversation"): | |
| conversation_history = gr.State([]) | |
| conversation_input = gr.Textbox(label="Human") | |
| conversation_output = gr.Textbox(label="Assistant") | |
| conversation_button = gr.Button("Send") | |
| def update_input_visibility(input_type): | |
| return { | |
| text_input: gr.update(visible=input_type == "Text"), | |
| url_input: gr.update(visible=input_type == "URL"), | |
| file_input: gr.update(visible=input_type == "File") | |
| } | |
| input_type.change(update_input_visibility, inputs=[input_type], outputs=[text_input, url_input, file_input]) | |
| def process_input(input_type, text, url, file, tasks): | |
| if input_type == "Text": | |
| input_value = text | |
| elif input_type == "URL": | |
| input_value = url | |
| else: | |
| input_value = file | |
| original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar) | |
| return original_text, summary, sentiment, topics | |
| submit_button.click( | |
| fn=process_input, | |
| inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes], | |
| outputs=[original_text_output, summary_output, sentiment_output, topics_output] | |
| ) | |
| def process_conversation(conversation_history, conversation_input): | |
| conversation_history.append(f"Human: {conversation_input}") | |
| response = chat(conversation_input, "\n".join(conversation_history)) | |
| conversation_history.append(f"Assistant: {response}") | |
| return "\n".join(conversation_history), "", response | |
| conversation_button.click( | |
| fn=process_conversation, | |
| inputs=[conversation_history, conversation_input], | |
| outputs=[conversation_history, conversation_input, conversation_output] | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| create_interface().launch() |