Spaces:

MrAlvaroA
/

GL-Project3

Runtime error

App Files Files Community

MrAlvaroA commited on Aug 18, 2024

Commit

0da6223

verified ·

1 Parent(s): 60b3f41

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -115

app.py CHANGED Viewed

@@ -2,13 +2,17 @@ import os
 import openai
 import pandas as pd
 import gradio as gr
 from openai import OpenAI
 from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
 #-------------------------------------------------------------------------------------
-def all_functions (question, quotes, temperature, document):
-    yield "Analyzing Question", "", ""
     with open('./templates/question_analysis.txt', 'r') as file:
         question_analysis = file.read()
@@ -31,128 +35,114 @@ def all_functions (question, quotes, temperature, document):
             temperature=0.0
         )
-        if response.choices[0].message.content == "Not a question":
-            yield "Question Analysis Done", "The question is not a question, can not continue", ""
-            return
-        elif response.choices[0].message.content == "Too many questions, maximum is 5.":
-            yield "Question Analysis Done", "Too many questions at once, can not continue", ""
-            return
         else:
-            lines = response.choices[0].message.content.splitlines()
-            question_analysis_string = ""
-            for line in lines:
-              cleaned_line = line.split("☻:")[-1]
-              question_analysis_string += cleaned_line + "\n"
-            yield "Question Analysis Done", question_analysis_string, ""
     except openai.OpenAIError as e:
         print(f"An error occurred: {str(e)}")
-        return
-    del lines[0]
-    #automatic_textboxes = create_answers_textboxes(lines)
     with open('./templates/qna.txt', 'r') as file:
         qna = file.read()
     with open('./templates/qna_template.txt', 'r') as file:
         qna_template = file.read()
-    source = get_full_path(document, source)
-    analyzed_answers =[]
-    for i, line in enumerate(lines):
-        yield "Analyzing Quote #" + i, question_analysis_string, ""
-        returned_quotes = vectorstored_persisted.similarity_search(line, k=quotes, filter = {"source":source})
-        context_for_query = ""
-        for i, d in enumerate(returned_quotes, start=1):
-            context_for_query += f"Quote {i}:\n"
-            context_for_query += d.page_content + "\n"
-            context_for_query += f"(Page = {d.metadata.get('page', 'Unknown')})\n\n"
-        answer_to_analyze = [
-            {"role": "system", "content": qna},
-            {"role": "user", "content": qna_template.format(
-                context=context_for_query,
-                question=user_input
-                )
-            }
-        ]
-        try:
-            answer_analyzed = client.chat.completions.create(
-                model=model_name,
-                messages=answer_to_analyze,
-                max_tokens=2000,
-                temperature=0.4
             )
-            analyzed_answers.append(answer_analyzed.choices[0].message.content)
-        except openai.OpenAIError as e:
-            print(f"An error occurred: {str(e)}")
-            return
-#-------------------------------------------------------------------------------------
-#-------------------------------------------------------------------------------------
-def create_answers_textboxes(questions):
-    for i, line in enumerate(lines):
-        textboxes.append(gr.Textbox(label=f"Question {i+1}: {question}", lines=10))
-    return textboxes
-#-------------------------------------------------------------------------------------
-#-------------------------------------------------------------------------------------
-def get_full_path(selected_filename, file_list):
-    for full_path in file_list:
-        # Extract the filename from the full path
-        if os.path.basename(full_path) == selected_filename:
-            return full_path
-    return None  # Return None if no match is found
-#-------------------------------------------------------------------------------------
 client=OpenAI(
-    api_key=os.getenv("OPENAI_API_KEY")
 )
 model_name = 'gpt-3.5-turbo'
-rater_model = 'gpt-4o-mini'
 embedding_model = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
-persisted_vectordb_location = './vector_db/'
 collection_name = 'companies-10K-2023'
-vectorstored_persisted = Chroma(
     collection_name=collection_name,
     embedding_function=embedding_model,
-    persist_directory=persisted_vectordb_location
 )
-stored_documents = vectorstored_persisted.get(include=["metadatas"])
-source = set()
 document_names = set()
 for metadata in stored_documents['metadatas']:
-    # Extract the source and use os.path.basename to get only the file name
     source = metadata.get('source', 'No source found')
     document_names.add(os.path.basename(source))
 document_list = list(document_names)
-with gr.Blocks(css="""
-    #question_input_box {height: 140px;}
-    #question_analysis_box {height: 320px;}
-""") as demo:
     with gr.Row():
         with gr.Column(scale=1):
@@ -161,9 +151,16 @@ with gr.Blocks(css="""
                 label="Document",
             )
             quotes_to_fetch = gr.Slider(
                 minimum=1,
-                maximum=5,
                 step=1,
                 label="How many quotes you want from the source",
             )
@@ -173,38 +170,24 @@ with gr.Blocks(css="""
                 maximum=1,
                 step=0.1,
                 label="Temperature",
-                info="Controls randomness: 0 = deterministic, 1 = creative/unexpected answers. If you can't get an answer try increasing the temperature but keep in mind that the accuracy can lower by doing this."
-            )
-            question_input = gr.Textbox(
-                label="Enter your question",
-                placeholder="Type your question here...",
-                elem_id="question_input_box",
-                lines=3
-            )
-        with gr.Column(scale=1):
-            status_button = gr.Button(value="Ready", interactive=False, elem_id="status_button")
-            question_analysis_output = gr.Textbox(
-                label="Question Analysis",
-                placeholder="The analysis will be shown here...",
-                interactive=False,
-                elem_id="question_analysis_box",
-                lines=10
-            )
     with gr.Row():
-        analyze_button = gr.Button("Analyze and Answer")
-    with gr.Row():
-        dynamic_textbox_output = gr.Column()
-    analyze_button.click(
-        all_functions,
         inputs=[question_input, quotes_to_fetch, temperature_slider, document_dropdown],
-        outputs=[status_button, question_analysis_output, dynamic_textbox_output]
     )
-demo.launch(share=True, show_error=True)

 import openai
 import pandas as pd
 import gradio as gr
+import uuid
+import json
+from huggingface_hub import CommitScheduler, HfApi
 from openai import OpenAI
 from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
 #-------------------------------------------------------------------------------------
+def get_answer (question, quotes, temperature, document):
+    yield "Running... Analyzing Question", "", question
     with open('./templates/question_analysis.txt', 'r') as file:
         question_analysis = file.read()
             temperature=0.0
         )
+        if response.choices[0].message.content == "Valid Question.":
+            yield "Running... Question Analysis Done", "", question
         else:
+            yield "Stopped: Question Analysis Done", "The question is not valid, stopping the process", ""
+            return
     except openai.OpenAIError as e:
         print(f"An error occurred: {str(e)}")
+        return
     with open('./templates/qna.txt', 'r') as file:
         qna = file.read()
     with open('./templates/qna_template.txt', 'r') as file:
         qna_template = file.read()
+    filename = "/content/dataset/" + document
+    quotes = vector_db.similarity_search(question, k=quotes, filter = {"source":filename})
+    context_for_query = ""
+    for i, d in enumerate(quotes, start=1):
+        context_for_query += f"Quote {i}:\n"
+        context_for_query += d.page_content + "\n"
+        context_for_query += f"(Page = {d.metadata.get('page', 'Unknown')})\n\n"
+    answer_to_analyze = [
+        {"role": "system", "content": qna},
+        {"role": "user", "content": qna_template.format(
+            context=context_for_query,
+            question=question
             )
+        }
+    ]
+    yield "Running... Getting best answer from AI", "", question
+    try:
+        answer_analyzed = client.chat.completions.create(
+            model=model_name,
+            messages=answer_to_analyze,
+            max_tokens=2000,
+            temperature=temperature
+        )
+        yield "Stopped... Process Finished", answer_analyzed.choices[0].message.content, ""
+    except openai.OpenAIError as e:
+        print(f"An error occurred: {str(e)}")
+        return
+    log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
+    log_folder = log_file.parent
+    scheduler = CommitScheduler(
+        repo_id="GL-Project3_Logs",
+        repo_type="dataset",
+        folder_path=log_folder,
+        path_in_repo="data",
+        every=2
+        token=hf_token
+    )
+    with scheduler.lock:
+        with log_file.open("a") as f:
+            f.write(json.dumps(
+                {
+                    'user_input': question,
+                    'retrieved_context': context_for_query,
+                    'model_response': answer_analyzed.choices[0].message.content
+                }
+            ))
+            f.write("\n")
+#-------------------------------------------------------------------------------------
+hf_token = os.getenv("HF_TOKEN")
+openai_api = os.getenv("OPENAI_API_KEY")
 client=OpenAI(
+    #api_key=openai_api
+    api_key=userdata.get('OpenAI-GL')
 )
 model_name = 'gpt-3.5-turbo'
 embedding_model = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
+vectordb_location = './companies-10K-2023_db1'
 collection_name = 'companies-10K-2023'
+vector_db = Chroma(
     collection_name=collection_name,
     embedding_function=embedding_model,
+    persist_directory=vectordb_location
 )
+stored_documents = vector_db.get(include=["metadatas"])
+sources = set()
 document_names = set()
 for metadata in stored_documents['metadatas']:
     source = metadata.get('source', 'No source found')
     document_names.add(os.path.basename(source))
 document_list = list(document_names)
+with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
                 label="Document",
             )
+            question_input = gr.Textbox(
+                label="Enter your question",
+                placeholder="Type your question here...",
+            )
+        with gr.Column(scale=1):
             quotes_to_fetch = gr.Slider(
                 minimum=1,
+                maximum=10,
                 step=1,
                 label="How many quotes you want from the source",
             )
                 maximum=1,
                 step=0.1,
                 label="Temperature",
+                info="Controls randomness: 0 = deterministic, 1 = creative/unexpected answers. If you can't get an answer try increasing the temperature."
+            )
+    with gr.Row():
+        fetch_answer = gr.Button("Analyze and Answer")
     with gr.Row():
+        answer_output = gr.Textbox(
+                label="Answer",
+                placeholder="Your answer will be displayed here..."
+            )
+    fetch_answer.click(
+        get_answer,
         inputs=[question_input, quotes_to_fetch, temperature_slider, document_dropdown],
+        outputs=[fetch_answer, answer_output, question_input]
     )
+demo.launch(share=True, show_error=True, debug=True)