File size: 5,245 Bytes
13e4835
 
 
 
 
 
 
 
 
986cd95
13e4835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c650b65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13e4835
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import pandas as pd
import pymupdf
from helper.button_behaviour import hide_button, unhide_button

class uploadFile:
    def __init__(self):
        self.file_dict = {}
        self.file_gt = {}
    
    def multiple_upload_file(self, uploaded_files):
        for _ in range(len(self.file_dict)):
            self.file_dict.popitem() 

        for uploaded_file in uploaded_files:
            if uploaded_file.type == "application/pdf":
                try:
                    with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:  
                        text = chr(12).join([page.get_text() for page in doc])
                        self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
                except Exception:
                    pass
            elif uploaded_file.type == "text/csv":
                try:
                    df = pd.read_csv(uploaded_file)
                    self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
                except Exception:
                    pass
   
            st.session_state['uploaded_files'] = self.file_dict
    
    def upload_website_audience(self, uploaded_files):
        for _ in range(len(self.file_dict)):
            self.file_dict.popitem() 

        for uploaded_file in uploaded_files:
            if uploaded_file.type == "application/pdf":
                try:
                    with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:  
                        text = chr(12).join([page.get_text() for page in doc])
                        self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
                except Exception:
                    pass
            elif uploaded_file.type == "text/csv":
                try:
                    # Skip comment lines that start with #
                    df = pd.read_csv(
                        uploaded_file,
                        comment='#',  # Treat lines starting with # as comments
                        engine='python'  # Use more flexible engine
                    )
                    self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
                except Exception as e:
                    print(f"Error processing CSV: {str(e)}")
                    # If that fails, you could try a more manual approach
                    try:
                        uploaded_file.seek(0)
                        raw_text = uploaded_file.read().decode('utf-8')
                        # Get only non-comment lines
                        data_lines = [line for line in raw_text.split('\n') if not line.strip().startswith('#')]
                        
                        # Use StringIO to create a file-like object from the filtered lines
                        from io import StringIO
                        csv_data = StringIO('\n'.join(data_lines))
                        
                        # Read from the filtered data
                        df = pd.read_csv(csv_data)
                        self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
                    except Exception as e:
                        print(f"Second attempt failed: {str(e)}")
            
            st.session_state['upload_website_audience'] = self.file_dict

    def upload_file_seo(self, uploaded_files):
        for _ in range(len(self.file_dict)):
            self.file_dict.popitem() 

        for uploaded_file in uploaded_files:
            if uploaded_file.type == "application/pdf":
                try:
                    with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:  
                        text = chr(12).join([page.get_text() for page in doc])
                        self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
                except Exception:
                    pass
            elif uploaded_file.type == "text/csv":
                try:
                    content = uploaded_file.read().decode("utf-8")
                    self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': content}
                except Exception:
                    pass
   
            st.session_state['uploaded_files'] = self.file_dict
    
    def upload_gt(self, gtmetrix):
        for _ in range(len(self.file_gt)):
            self.file_gt.popitem() 

        for gtmetrixs in gtmetrix:
            if gtmetrixs.type == "application/pdf":
                try:
                    with pymupdf.open(stream=gtmetrixs.read(), filetype="pdf") as doc:  
                        text = chr(12).join([page.get_text() for page in doc])
                        self.file_gt[gtmetrixs.name] = {'type': 'pdf', 'content': text}
                except Exception:
                    pass
            elif gtmetrixs.type == "text/csv":
                try:
                    content = gtmetrixs.read().decode("utf-8")
                    self.file_dict[gtmetrixs.name] = {'type': 'csv', 'content': content}
                except Exception:
                    pass
            st.session_state['uploaded_gt'] = self.file_gt

if __name__ == "__main__":
    app = uploadFile()
    st.set_page_config(layout="wide")