Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 28, 2025

Commit

f6e732f

verified ·

1 Parent(s): 03d0510

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -28

app.py CHANGED Viewed

@@ -115,37 +115,14 @@ Now produce the final report only, without reiterating these instructions or the
     ]
     return messages
-# A helper to extract the assistant's response
-def extract_assistant_response(output):
-    """
-    Extract only the content from the assistant's response.
-    Handles nested structure from the pipeline output.
-    """
-    try:
-        if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]:
-            messages = output[0]['generated_text']
-            if isinstance(messages, list):
-                for message in messages:
-                    if isinstance(message, dict) and message.get('role') == 'assistant':
-                        return message.get('content', '')
-        if isinstance(output, list):
-            for item in output:
-                if isinstance(item, dict) and item.get('role') == 'assistant':
-                    return item.get('content', '')
-        print(f"DEBUG: Could not find assistant response in: {str(output)[:200]}...")
-        return ''
-    except Exception as e:
-        print(f"Error extracting assistant response: {e}")
-        return ''
 # Main Function Part:
 def main():
     # Let the user specify the column name for tweets text (defaulting to "content")
     tweets_column = st.text_input("Enter the column name for Tweets🐦:", value="content")
     # Input: Query question for analysis and CSV file upload for candidate tweets
-    query_input = st.text_area("Enter your query question❔ for analysis (this does not need to be part of the CSV):")
-    uploaded_file = st.file_uploader(f"Upload Tweets CSV File🗄️ (must contain a '{tweets_column}' column with preferably <500 tweets)", type=["csv"])
     candidate_docs = []
     if uploaded_file is not None:
@@ -263,7 +240,7 @@ def main():
             status_text.markdown("**📝 Preparing data for report generation...**")
             progress_bar.progress(75)
-            max_tweets = 500
             if len(scored_docs) > max_tweets:
                 sampled_docs = random.sample(scored_docs, max_tweets)
                 st.info(f"Sampling {max_tweets} out of {len(scored_docs)} tweets for report generation")
@@ -280,7 +257,7 @@ def main():
                         device="cuda" if torch.cuda.is_available() else -1,
                         torch_dtype=torch.bfloat16,
                     )
-                    result = pipe(prompt, max_new_tokens=256, return_full_text=False)
                     return result, None
                 except Exception as e:
                     return None, str(e)
@@ -295,7 +272,7 @@ def main():
                 report = "Error generating report. Please try again with fewer tweets."
             else:
                 report = raw_result[0][0]['generated_text']
-                # report = extract_assistant_response(raw_result)
             clear_gpu_memory()
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")

     ]
     return messages
 # Main Function Part:
 def main():
     # Let the user specify the column name for tweets text (defaulting to "content")
     tweets_column = st.text_input("Enter the column name for Tweets🐦:", value="content")
     # Input: Query question for analysis and CSV file upload for candidate tweets
+    query_input = st.text_area("Enter your query question❓for analysis (Format: How do these people feel about ...?) (this does not need to be part of the CSV):")
+    uploaded_file = st.file_uploader(f"Upload Tweets CSV File🗄️(must contain a '{tweets_column}' column with preferably <500 tweets)", type=["csv"])
     candidate_docs = []
     if uploaded_file is not None:
             status_text.markdown("**📝 Preparing data for report generation...**")
             progress_bar.progress(75)
+            max_tweets = 1000
             if len(scored_docs) > max_tweets:
                 sampled_docs = random.sample(scored_docs, max_tweets)
                 st.info(f"Sampling {max_tweets} out of {len(scored_docs)} tweets for report generation")
                         device="cuda" if torch.cuda.is_available() else -1,
                         torch_dtype=torch.bfloat16,
                     )
+                    result = pipe(prompt, max_new_tokens=400, return_full_text=False)
                     return result, None
                 except Exception as e:
                     return None, str(e)
                 report = "Error generating report. Please try again with fewer tweets."
             else:
                 report = raw_result[0][0]['generated_text']
             clear_gpu_memory()
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")