File size: 2,739 Bytes
8dc12f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import re
import torch
from transformers import pipeline
#2
class DescriptionExtractor:
    
    def __init__(self):
        self.summarizer = pipeline(
            "summarization",
            model="facebook/bart-large-cnn"
        )
        self.part_keywords = [
            "screen", "display", "glass", "battery", "power",
            "charging port", "port", "hinge", "keyboard", "keys",
            "speaker", "audio", "microphone", "body", "frame",
            "casing", "lid", "touchpad", "camera"
        ]
        
        self.symptom_keywords = [
            "crack", "broken", "damage", "not working", "loose",
            "drain", "hot", "overheat", "scratch", "dent",
            "bent", "water", "liquid", "sound", "audio"
        ]
    
    def extract(self, description):
        
        if not description or len(description.strip()) < 5:
            return {
                'original': description,
                'summary': description,
                'affected_parts': [],
                'symptoms': [],
                'keywords': [],
                'length_category': 'none'
            }
        
        desc_lower = description.lower()
        
        word_count = len(description.split())
        if word_count < 10:
            length_category = 'short'
            summary = description
        elif word_count < 50:
            length_category = 'medium'
            summary = description
        else:
            length_category = 'long'
            try:
                summary_result = self.summarizer(
                    description,
                    max_length=50,
                    min_length=10,
                    do_sample=False
                )
                summary = summary_result[0]['summary_text']
            except:
                summary = ' '.join(description.split()[:40]) + "..."
        affected_parts = [
            part for part in self.part_keywords
            if part in desc_lower
        ]
        
        symptoms = [
            symptom for symptom in self.symptom_keywords
            if symptom in desc_lower
        ]
        keywords = list(set(affected_parts + symptoms))
        
        return {
            'original': description,
            'summary': summary,
            'affected_parts': affected_parts,
            'symptoms': symptoms,
            'keywords': keywords,
            'length_category': length_category,
            'word_count': word_count
        }
    
    def create_search_text(self, description_info):
        if not description_info['keywords']:
            return description_info['summary']
        search_text = f"{description_info['summary']} {' '.join(description_info['keywords'])}"
        return search_text