MrAlvaroA commited on
Commit
0da6223
·
verified ·
1 Parent(s): 60b3f41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -115
app.py CHANGED
@@ -2,13 +2,17 @@ import os
2
  import openai
3
  import pandas as pd
4
  import gradio as gr
 
 
 
 
5
  from openai import OpenAI
6
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
7
  from langchain_community.vectorstores import Chroma
8
 
9
  #-------------------------------------------------------------------------------------
10
- def all_functions (question, quotes, temperature, document):
11
- yield "Analyzing Question", "", ""
12
  with open('./templates/question_analysis.txt', 'r') as file:
13
  question_analysis = file.read()
14
 
@@ -31,128 +35,114 @@ def all_functions (question, quotes, temperature, document):
31
  temperature=0.0
32
  )
33
 
34
- if response.choices[0].message.content == "Not a question":
35
- yield "Question Analysis Done", "The question is not a question, can not continue", ""
36
- return
37
-
38
- elif response.choices[0].message.content == "Too many questions, maximum is 5.":
39
- yield "Question Analysis Done", "Too many questions at once, can not continue", ""
40
- return
41
-
42
  else:
43
- lines = response.choices[0].message.content.splitlines()
44
- question_analysis_string = ""
45
-
46
- for line in lines:
47
- cleaned_line = line.split("☻:")[-1]
48
- question_analysis_string += cleaned_line + "\n"
49
 
50
- yield "Question Analysis Done", question_analysis_string, ""
51
-
52
  except openai.OpenAIError as e:
53
  print(f"An error occurred: {str(e)}")
54
- return
55
-
56
- del lines[0]
57
- #automatic_textboxes = create_answers_textboxes(lines)
58
-
59
  with open('./templates/qna.txt', 'r') as file:
60
  qna = file.read()
61
 
62
  with open('./templates/qna_template.txt', 'r') as file:
63
  qna_template = file.read()
64
 
65
- source = get_full_path(document, source)
66
-
67
- analyzed_answers =[]
68
- for i, line in enumerate(lines):
69
- yield "Analyzing Quote #" + i, question_analysis_string, ""
70
- returned_quotes = vectorstored_persisted.similarity_search(line, k=quotes, filter = {"source":source})
71
 
72
- context_for_query = ""
73
-
74
- for i, d in enumerate(returned_quotes, start=1):
75
- context_for_query += f"Quote {i}:\n"
76
- context_for_query += d.page_content + "\n"
77
- context_for_query += f"(Page = {d.metadata.get('page', 'Unknown')})\n\n"
78
-
79
- answer_to_analyze = [
80
- {"role": "system", "content": qna},
81
- {"role": "user", "content": qna_template.format(
82
- context=context_for_query,
83
- question=user_input
84
- )
85
- }
86
- ]
87
-
88
- try:
89
- answer_analyzed = client.chat.completions.create(
90
- model=model_name,
91
- messages=answer_to_analyze,
92
- max_tokens=2000,
93
- temperature=0.4
94
  )
95
-
96
- analyzed_answers.append(answer_analyzed.choices[0].message.content)
97
-
98
- except openai.OpenAIError as e:
99
- print(f"An error occurred: {str(e)}")
100
- return
101
- #-------------------------------------------------------------------------------------
102
 
 
103
 
104
- #-------------------------------------------------------------------------------------
105
- def create_answers_textboxes(questions):
106
-
107
- for i, line in enumerate(lines):
108
- textboxes.append(gr.Textbox(label=f"Question {i+1}: {question}", lines=10))
109
-
110
- return textboxes
111
- #-------------------------------------------------------------------------------------
 
112
 
113
- #-------------------------------------------------------------------------------------
114
- def get_full_path(selected_filename, file_list):
115
- for full_path in file_list:
116
- # Extract the filename from the full path
117
- if os.path.basename(full_path) == selected_filename:
118
- return full_path
119
- return None # Return None if no match is found
120
- #-------------------------------------------------------------------------------------
121
 
 
 
122
 
 
 
 
 
 
 
 
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  client=OpenAI(
126
- api_key=os.getenv("OPENAI_API_KEY")
 
127
  )
128
 
129
  model_name = 'gpt-3.5-turbo'
130
- rater_model = 'gpt-4o-mini'
131
  embedding_model = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
132
- persisted_vectordb_location = './vector_db/'
133
  collection_name = 'companies-10K-2023'
134
 
135
- vectorstored_persisted = Chroma(
136
  collection_name=collection_name,
137
  embedding_function=embedding_model,
138
- persist_directory=persisted_vectordb_location
139
  )
140
 
141
- stored_documents = vectorstored_persisted.get(include=["metadatas"])
142
- source = set()
143
  document_names = set()
144
 
145
  for metadata in stored_documents['metadatas']:
146
- # Extract the source and use os.path.basename to get only the file name
147
  source = metadata.get('source', 'No source found')
148
  document_names.add(os.path.basename(source))
149
 
150
  document_list = list(document_names)
151
 
152
- with gr.Blocks(css="""
153
- #question_input_box {height: 140px;}
154
- #question_analysis_box {height: 320px;}
155
- """) as demo:
156
  with gr.Row():
157
  with gr.Column(scale=1):
158
 
@@ -161,9 +151,16 @@ with gr.Blocks(css="""
161
  label="Document",
162
  )
163
 
 
 
 
 
 
 
 
164
  quotes_to_fetch = gr.Slider(
165
  minimum=1,
166
- maximum=5,
167
  step=1,
168
  label="How many quotes you want from the source",
169
  )
@@ -173,38 +170,24 @@ with gr.Blocks(css="""
173
  maximum=1,
174
  step=0.1,
175
  label="Temperature",
176
- info="Controls randomness: 0 = deterministic, 1 = creative/unexpected answers. If you can't get an answer try increasing the temperature but keep in mind that the accuracy can lower by doing this."
177
- )
178
 
179
-
180
- question_input = gr.Textbox(
181
- label="Enter your question",
182
- placeholder="Type your question here...",
183
- elem_id="question_input_box",
184
- lines=3
185
- )
186
-
187
- with gr.Column(scale=1):
188
- status_button = gr.Button(value="Ready", interactive=False, elem_id="status_button")
189
-
190
- question_analysis_output = gr.Textbox(
191
- label="Question Analysis",
192
- placeholder="The analysis will be shown here...",
193
- interactive=False,
194
- elem_id="question_analysis_box",
195
- lines=10
196
- )
197
 
198
  with gr.Row():
199
- analyze_button = gr.Button("Analyze and Answer")
 
 
 
 
200
 
201
- with gr.Row():
202
- dynamic_textbox_output = gr.Column()
203
-
204
- analyze_button.click(
205
- all_functions,
206
  inputs=[question_input, quotes_to_fetch, temperature_slider, document_dropdown],
207
- outputs=[status_button, question_analysis_output, dynamic_textbox_output]
208
  )
209
 
210
- demo.launch(share=True, show_error=True)
 
2
  import openai
3
  import pandas as pd
4
  import gradio as gr
5
+ import uuid
6
+ import json
7
+
8
+ from huggingface_hub import CommitScheduler, HfApi
9
  from openai import OpenAI
10
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
11
  from langchain_community.vectorstores import Chroma
12
 
13
  #-------------------------------------------------------------------------------------
14
+ def get_answer (question, quotes, temperature, document):
15
+ yield "Running... Analyzing Question", "", question
16
  with open('./templates/question_analysis.txt', 'r') as file:
17
  question_analysis = file.read()
18
 
 
35
  temperature=0.0
36
  )
37
 
38
+ if response.choices[0].message.content == "Valid Question.":
39
+ yield "Running... Question Analysis Done", "", question
40
+
 
 
 
 
 
41
  else:
42
+ yield "Stopped: Question Analysis Done", "The question is not valid, stopping the process", ""
43
+ return
 
 
 
 
44
 
 
 
45
  except openai.OpenAIError as e:
46
  print(f"An error occurred: {str(e)}")
47
+ return
48
+
 
 
 
49
  with open('./templates/qna.txt', 'r') as file:
50
  qna = file.read()
51
 
52
  with open('./templates/qna_template.txt', 'r') as file:
53
  qna_template = file.read()
54
 
55
+ filename = "/content/dataset/" + document
56
+
 
 
 
 
57
 
58
+ quotes = vector_db.similarity_search(question, k=quotes, filter = {"source":filename})
59
+
60
+ context_for_query = ""
61
+
62
+ for i, d in enumerate(quotes, start=1):
63
+ context_for_query += f"Quote {i}:\n"
64
+ context_for_query += d.page_content + "\n"
65
+ context_for_query += f"(Page = {d.metadata.get('page', 'Unknown')})\n\n"
66
+
67
+ answer_to_analyze = [
68
+ {"role": "system", "content": qna},
69
+ {"role": "user", "content": qna_template.format(
70
+ context=context_for_query,
71
+ question=question
 
 
 
 
 
 
 
 
72
  )
73
+ }
74
+ ]
 
 
 
 
 
75
 
76
+ yield "Running... Getting best answer from AI", "", question
77
 
78
+ try:
79
+ answer_analyzed = client.chat.completions.create(
80
+ model=model_name,
81
+ messages=answer_to_analyze,
82
+ max_tokens=2000,
83
+ temperature=temperature
84
+ )
85
+
86
+ yield "Stopped... Process Finished", answer_analyzed.choices[0].message.content, ""
87
 
88
+ except openai.OpenAIError as e:
89
+ print(f"An error occurred: {str(e)}")
90
+ return
 
 
 
 
 
91
 
92
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
93
+ log_folder = log_file.parent
94
 
95
+ scheduler = CommitScheduler(
96
+ repo_id="GL-Project3_Logs",
97
+ repo_type="dataset",
98
+ folder_path=log_folder,
99
+ path_in_repo="data",
100
+ every=2
101
+ token=hf_token
102
+ )
103
 
104
+ with scheduler.lock:
105
+ with log_file.open("a") as f:
106
+ f.write(json.dumps(
107
+ {
108
+ 'user_input': question,
109
+ 'retrieved_context': context_for_query,
110
+ 'model_response': answer_analyzed.choices[0].message.content
111
+ }
112
+ ))
113
+ f.write("\n")
114
+ #-------------------------------------------------------------------------------------
115
+
116
+ hf_token = os.getenv("HF_TOKEN")
117
+ openai_api = os.getenv("OPENAI_API_KEY")
118
 
119
  client=OpenAI(
120
+ #api_key=openai_api
121
+ api_key=userdata.get('OpenAI-GL')
122
  )
123
 
124
  model_name = 'gpt-3.5-turbo'
 
125
  embedding_model = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
126
+ vectordb_location = './companies-10K-2023_db1'
127
  collection_name = 'companies-10K-2023'
128
 
129
+ vector_db = Chroma(
130
  collection_name=collection_name,
131
  embedding_function=embedding_model,
132
+ persist_directory=vectordb_location
133
  )
134
 
135
+ stored_documents = vector_db.get(include=["metadatas"])
136
+ sources = set()
137
  document_names = set()
138
 
139
  for metadata in stored_documents['metadatas']:
 
140
  source = metadata.get('source', 'No source found')
141
  document_names.add(os.path.basename(source))
142
 
143
  document_list = list(document_names)
144
 
145
+ with gr.Blocks() as demo:
 
 
 
146
  with gr.Row():
147
  with gr.Column(scale=1):
148
 
 
151
  label="Document",
152
  )
153
 
154
+ question_input = gr.Textbox(
155
+ label="Enter your question",
156
+ placeholder="Type your question here...",
157
+ )
158
+
159
+ with gr.Column(scale=1):
160
+
161
  quotes_to_fetch = gr.Slider(
162
  minimum=1,
163
+ maximum=10,
164
  step=1,
165
  label="How many quotes you want from the source",
166
  )
 
170
  maximum=1,
171
  step=0.1,
172
  label="Temperature",
173
+ info="Controls randomness: 0 = deterministic, 1 = creative/unexpected answers. If you can't get an answer try increasing the temperature."
174
+ )
175
 
176
+ with gr.Row():
177
+
178
+ fetch_answer = gr.Button("Analyze and Answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  with gr.Row():
181
+
182
+ answer_output = gr.Textbox(
183
+ label="Answer",
184
+ placeholder="Your answer will be displayed here..."
185
+ )
186
 
187
+ fetch_answer.click(
188
+ get_answer,
 
 
 
189
  inputs=[question_input, quotes_to_fetch, temperature_slider, document_dropdown],
190
+ outputs=[fetch_answer, answer_output, question_input]
191
  )
192
 
193
+ demo.launch(share=True, show_error=True, debug=True)