elia-waefler commited on
Commit
f0134bc
·
1 Parent(s): d6d7f26

downloadable classification, multiple files

Browse files
Files changed (1) hide show
  1. app.py +63 -31
app.py CHANGED
@@ -5,6 +5,8 @@ from PyPDF2 import PdfReader
5
  from openai import OpenAI
6
  from langchain.chat_models import ChatOpenAI
7
 
 
 
8
 
9
  def gpt4_new(prompt_text):
10
  client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
@@ -80,47 +82,77 @@ def json_open(filename):
80
 
81
 
82
  def main():
83
- ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
84
  st.title("Doc Classifier")
85
  st.subheader("Limitationen: ")
86
  st.write("bisher nur PDFs")
87
- st.write("nur Disziplin und Doc typ.")
88
- st.write("Dokumenten Grösse ist stark limitiert")
89
  st.write("macht noch viele Fehler, vor allem bei Koordination, Datennetz usw, (unklare Disziplinen)")
90
  st.write("")
91
 
92
  if st.text_input("ASK_ASH_PASSWORD: ", type="password") == ASK_ASH_PASSWORD:
93
- uploaded_file = st.file_uploader("PDF Dokument", accept_multiple_files=False)
94
  #print(uploaded_file)
95
  #print(uploaded_file.name)
 
96
  if st.button("classify KBOB!"):
97
- if uploaded_file is not None:
98
- with st.spinner("GPT4 at work"):
99
- pdf_text = str(get_pdf_text(uploaded_file))
100
- prompt_1 = auftrag_0 + auftrag_1_disz + str(Baubranchen_Disziplinen) + pdf_text
101
- answer_1 = gpt4_new(prompt_1)
102
- print(prompt_1)
103
-
104
- st.write(answer_1)
105
- with st.spinner("GPT4 at work"):
106
- prompt_2 = auftrag_0 + auftrag_1_dokt + str(Dokumententypen) + pdf_text
107
- answer_2 = gpt4_new(prompt_2)
108
- print(prompt_2)
109
-
110
- st.write(answer_2)
111
- with st.spinner("GPT4 at work"):
112
- prompt_3 = auftrag_0 + auftrag_1_gesch + str(ASH_Geschosse) + pdf_text
113
- answer_3 = gpt4_new(prompt_3)
114
- print(prompt_3)
115
- with open('gpt4_responses.txt', 'w', encoding='utf-8') as file:
116
- file.writelines(answer_1)
117
- file.writelines(answer_2)
118
- file.writelines(answer_3)
119
- st.write(answer_3)
120
- st.success("work done, saved")
121
-
122
- st.download_button("download txt", json_open("gpt4_responses.txt"), file_name="classification.txt")
123
- #st.download_button("download txt", json_open("gpt4_responses.txt"), file_name="gpt4_responses.txt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  else:
125
  st.warning("no file")
126
 
 
5
  from openai import OpenAI
6
  from langchain.chat_models import ChatOpenAI
7
 
8
+ ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
9
+
10
 
11
  def gpt4_new(prompt_text):
12
  client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
 
82
 
83
 
84
  def main():
 
85
  st.title("Doc Classifier")
86
  st.subheader("Limitationen: ")
87
  st.write("bisher nur PDFs")
88
+ st.write("nur Disziplin, Doc typ. und Geschoss")
 
89
  st.write("macht noch viele Fehler, vor allem bei Koordination, Datennetz usw, (unklare Disziplinen)")
90
  st.write("")
91
 
92
  if st.text_input("ASK_ASH_PASSWORD: ", type="password") == ASK_ASH_PASSWORD:
93
+ uploaded_files = st.file_uploader("PDF Dokument", accept_multiple_files=True)
94
  #print(uploaded_file)
95
  #print(uploaded_file.name)
96
+
97
  if st.button("classify KBOB!"):
98
+ if uploaded_files is not None:
99
+ with st.container():
100
+ # col1, col2, col3, col4, col5 = st.columns(5)
101
+ col1, col2, col3 = st.columns(3)
102
+ all_metadata = []
103
+ with col1:
104
+ st.write("Disziplin")
105
+ st.write(f"")
106
+ with col2:
107
+ st.write("Disziplin")
108
+ st.write(f"")
109
+ with col3:
110
+ st.write("Disziplin")
111
+ st.write(f"")
112
+
113
+ for file in uploaded_files:
114
+ metadata = [file.name]
115
+ with col1:
116
+ with st.spinner("GPT4 at work"):
117
+ pdf_text = str(get_pdf_text(file))
118
+ prompt_1 = auftrag_0 + auftrag_1_disz + str(Baubranchen_Disziplinen) + pdf_text
119
+ answer_1 = gpt4_new(prompt_1)
120
+ print(prompt_1)
121
+ metadata.append(answer_1)
122
+ st.write(answer_1)
123
+
124
+ with col2:
125
+ with st.spinner("GPT4 at work"):
126
+ prompt_2 = auftrag_0 + auftrag_1_dokt + str(Dokumententypen) + pdf_text
127
+ answer_2 = gpt4_new(prompt_2)
128
+ print(prompt_2)
129
+ metadata.append(answer_2)
130
+
131
+ st.write(answer_2)
132
+
133
+ with col3:
134
+ with st.spinner("GPT4 at work"):
135
+ prompt_3 = auftrag_0 + auftrag_1_gesch + str(ASH_Geschosse) + pdf_text
136
+ answer_3 = gpt4_new(prompt_3)
137
+ print(prompt_3)
138
+ metadata.append(answer_2)
139
+
140
+ st.write(answer_3)
141
+
142
+ all_metadata.append(metadata)
143
+
144
+ metadata_filename = "ai_generated_metadata.txt"
145
+ with open(metadata_filename, 'w', encoding='utf-8') as f:
146
+ for line in all_metadata:
147
+ f.writelines("\n")
148
+ for item in line:
149
+ f.writelines(item)
150
+ f.writelines(";")
151
+
152
+ f.writelines("\n")
153
+
154
+ st.success("classified, saved")
155
+ st.download_button(f"Download Metadata", json_open(metadata_filename), file_name=metadata_filename)
156
  else:
157
  st.warning("no file")
158