File size: 1,624 Bytes
b1d8431
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cfe9e
b1d8431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cfe9e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from typing import Any, Optional
from smolagents.tools import Tool
import transformers
import PyPDF2
import io
import requests

class DocumentAnalyzer(Tool):
    """
    A tool that analyzes PDF documents and extracts key information.
    """
    name = "analyze_document"
    description = "Analyzes a PDF document and extracts key information like summary and sentiment"
    inputs = {'document_url': {'type': 'string', 'description': 'URL to a PDF document'}}
    output_type = "string"

    def __init__(self):
        super().__init__()

    def forward(self, document_url: str) -> dict:
        """
        Analyzes a PDF document and extracts key information.

        Args:
            document_url (str): URL to a PDF document

        Returns:
            dict: Contains summary, key points, and sentiment
        """
        import PyPDF2
        import io
        import requests
        from transformers import pipeline

        # Download the document
        response = requests.get(document_url)
        pdf_file = io.BytesIO(response.content)

        # Extract text
        reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()

        # Summarize text
        summarizer = pipeline("summarization", max_length=100)
        summary = summarizer(text[:1024])[0]['summary_text']

        # Sentiment analysis
        sentiment_analyzer = pipeline("sentiment-analysis")
        sentiment = sentiment_analyzer(text[:512])[0]

        return f"Summary: {summary}\nSentiment: {sentiment['label']}\nConfidence: {sentiment['score']:.2f}"