Spaces:

ProfessorLeVesseur
/

PDF_Topic_Extraction_Analysis_App

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Apr 11, 2025

Commit

da1f0f6

verified ·

1 Parent(s): 0d32d1f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -5

app.py CHANGED Viewed

@@ -310,7 +310,10 @@ client = InferenceClient(api_key=hf_api_key)
 # ---------------------------------------------------------------------------------------
 # Survey Analysis Class
 # ---------------------------------------------------------------------------------------
-class SurveyAnalysis:
     def prepare_llm_input(self, survey_response, topics):
         topic_descriptions = "\n".join([f"- **{t}**: {d}" for t, d in topics.items()])
         return f"""Extract and summarize PDF notes based on topics:
@@ -330,7 +333,7 @@ Meeting Notes:
     def prompt_response_from_hf_llm(self, llm_input):
         system_prompt = """
-        You are an expert assistant tasked with extracting exact quotes from provided meeting notes based on given topics.
         Instructions:
         - Only extract exact quotes relevant to provided topics.
@@ -341,18 +344,23 @@ Meeting Notes:
         - "Exact quote"
         """
-        response = client.chat.completions.create(
             model="meta-llama/Llama-3.1-70B-Instruct",
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": llm_input}
             ],
             temperature=0.5,
             max_tokens=1024,
             top_p=0.7
         )
-        response_content = response.choices[0].message.content
         print("Full AI Response:", response_content)  # Debugging
         return response_content.strip()
@@ -368,6 +376,15 @@ Meeting Notes:
             results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
         return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
 # ---------------------------------------------------------------------------------------
 # Helper Functions
 # ---------------------------------------------------------------------------------------
@@ -457,7 +474,7 @@ if st.session_state['pdf_processed']:
             st.warning("Please enter at least one topic and description.")
             st.stop()
-        analyzer = SurveyAnalysis()
         processed_df = analyzer.process_dataframe(st.session_state['df'], topics)
         extracted_df = extract_excerpts(processed_df)

 # ---------------------------------------------------------------------------------------
 # Survey Analysis Class
 # ---------------------------------------------------------------------------------------
+class AIAnalysis:
+    def __init__(self, client):
+        self.client = client
     def prepare_llm_input(self, survey_response, topics):
         topic_descriptions = "\n".join([f"- **{t}**: {d}" for t, d in topics.items()])
         return f"""Extract and summarize PDF notes based on topics:
     def prompt_response_from_hf_llm(self, llm_input):
         system_prompt = """
+        You are an expert assistant tasked with extracting exact quotes from provided meeting notes based on given topics.
         Instructions:
         - Only extract exact quotes relevant to provided topics.
         - "Exact quote"
         """
+        response = self.client.chat.completions.create(
             model="meta-llama/Llama-3.1-70B-Instruct",
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": llm_input}
             ],
+            stream=True,
             temperature=0.5,
             max_tokens=1024,
             top_p=0.7
         )
+        response_content = ""
+        for message in response:
+            # Correctly handle streaming response
+            response_content += message.choices[0].delta.content
         print("Full AI Response:", response_content)  # Debugging
         return response_content.strip()
             results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
         return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
+    def process_dataframe(self, df, topics):
+        results = []
+        for _, row in df.iterrows():
+            llm_input = self.prepare_llm_input(row['Document_Text'], topics)
+            response = self.prompt_response_from_hf_llm(llm_input)
+            notes = self.extract_text(response)
+            results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
+        return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
 # ---------------------------------------------------------------------------------------
 # Helper Functions
 # ---------------------------------------------------------------------------------------
             st.warning("Please enter at least one topic and description.")
             st.stop()
+        analyzer = AIAnalysis()
         processed_df = analyzer.process_dataframe(st.session_state['df'], topics)
         extracted_df = extract_excerpts(processed_df)