Spaces:
Sleeping
Sleeping
File size: 1,624 Bytes
b1d8431 c2cfe9e b1d8431 c2cfe9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from typing import Any, Optional
from smolagents.tools import Tool
import transformers
import PyPDF2
import io
import requests
class DocumentAnalyzer(Tool):
"""
A tool that analyzes PDF documents and extracts key information.
"""
name = "analyze_document"
description = "Analyzes a PDF document and extracts key information like summary and sentiment"
inputs = {'document_url': {'type': 'string', 'description': 'URL to a PDF document'}}
output_type = "string"
def __init__(self):
super().__init__()
def forward(self, document_url: str) -> dict:
"""
Analyzes a PDF document and extracts key information.
Args:
document_url (str): URL to a PDF document
Returns:
dict: Contains summary, key points, and sentiment
"""
import PyPDF2
import io
import requests
from transformers import pipeline
# Download the document
response = requests.get(document_url)
pdf_file = io.BytesIO(response.content)
# Extract text
reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
# Summarize text
summarizer = pipeline("summarization", max_length=100)
summary = summarizer(text[:1024])[0]['summary_text']
# Sentiment analysis
sentiment_analyzer = pipeline("sentiment-analysis")
sentiment = sentiment_analyzer(text[:512])[0]
return f"Summary: {summary}\nSentiment: {sentiment['label']}\nConfidence: {sentiment['score']:.2f}"
|