Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -115,37 +115,14 @@ Now produce the final report only, without reiterating these instructions or the
|
|
| 115 |
]
|
| 116 |
return messages
|
| 117 |
|
| 118 |
-
# A helper to extract the assistant's response
|
| 119 |
-
def extract_assistant_response(output):
|
| 120 |
-
"""
|
| 121 |
-
Extract only the content from the assistant's response.
|
| 122 |
-
Handles nested structure from the pipeline output.
|
| 123 |
-
"""
|
| 124 |
-
try:
|
| 125 |
-
if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]:
|
| 126 |
-
messages = output[0]['generated_text']
|
| 127 |
-
if isinstance(messages, list):
|
| 128 |
-
for message in messages:
|
| 129 |
-
if isinstance(message, dict) and message.get('role') == 'assistant':
|
| 130 |
-
return message.get('content', '')
|
| 131 |
-
if isinstance(output, list):
|
| 132 |
-
for item in output:
|
| 133 |
-
if isinstance(item, dict) and item.get('role') == 'assistant':
|
| 134 |
-
return item.get('content', '')
|
| 135 |
-
print(f"DEBUG: Could not find assistant response in: {str(output)[:200]}...")
|
| 136 |
-
return ''
|
| 137 |
-
except Exception as e:
|
| 138 |
-
print(f"Error extracting assistant response: {e}")
|
| 139 |
-
return ''
|
| 140 |
-
|
| 141 |
# Main Function Part:
|
| 142 |
def main():
|
| 143 |
# Let the user specify the column name for tweets text (defaulting to "content")
|
| 144 |
tweets_column = st.text_input("Enter the column name for Tweets🐦:", value="content")
|
| 145 |
|
| 146 |
# Input: Query question for analysis and CSV file upload for candidate tweets
|
| 147 |
-
query_input = st.text_area("Enter your query question
|
| 148 |
-
uploaded_file = st.file_uploader(f"Upload Tweets CSV File🗄️
|
| 149 |
|
| 150 |
candidate_docs = []
|
| 151 |
if uploaded_file is not None:
|
|
@@ -263,7 +240,7 @@ def main():
|
|
| 263 |
status_text.markdown("**📝 Preparing data for report generation...**")
|
| 264 |
progress_bar.progress(75)
|
| 265 |
|
| 266 |
-
max_tweets =
|
| 267 |
if len(scored_docs) > max_tweets:
|
| 268 |
sampled_docs = random.sample(scored_docs, max_tweets)
|
| 269 |
st.info(f"Sampling {max_tweets} out of {len(scored_docs)} tweets for report generation")
|
|
@@ -280,7 +257,7 @@ def main():
|
|
| 280 |
device="cuda" if torch.cuda.is_available() else -1,
|
| 281 |
torch_dtype=torch.bfloat16,
|
| 282 |
)
|
| 283 |
-
result = pipe(prompt, max_new_tokens=
|
| 284 |
return result, None
|
| 285 |
except Exception as e:
|
| 286 |
return None, str(e)
|
|
@@ -295,7 +272,7 @@ def main():
|
|
| 295 |
report = "Error generating report. Please try again with fewer tweets."
|
| 296 |
else:
|
| 297 |
report = raw_result[0][0]['generated_text']
|
| 298 |
-
|
| 299 |
clear_gpu_memory()
|
| 300 |
progress_bar.progress(100)
|
| 301 |
status_text.success("**✅ Generation complete!**")
|
|
|
|
| 115 |
]
|
| 116 |
return messages
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# Main Function Part:
|
| 119 |
def main():
|
| 120 |
# Let the user specify the column name for tweets text (defaulting to "content")
|
| 121 |
tweets_column = st.text_input("Enter the column name for Tweets🐦:", value="content")
|
| 122 |
|
| 123 |
# Input: Query question for analysis and CSV file upload for candidate tweets
|
| 124 |
+
query_input = st.text_area("Enter your query question❓for analysis (Format: How do these people feel about ...?) (this does not need to be part of the CSV):")
|
| 125 |
+
uploaded_file = st.file_uploader(f"Upload Tweets CSV File🗄️(must contain a '{tweets_column}' column with preferably <500 tweets)", type=["csv"])
|
| 126 |
|
| 127 |
candidate_docs = []
|
| 128 |
if uploaded_file is not None:
|
|
|
|
| 240 |
status_text.markdown("**📝 Preparing data for report generation...**")
|
| 241 |
progress_bar.progress(75)
|
| 242 |
|
| 243 |
+
max_tweets = 1000
|
| 244 |
if len(scored_docs) > max_tweets:
|
| 245 |
sampled_docs = random.sample(scored_docs, max_tweets)
|
| 246 |
st.info(f"Sampling {max_tweets} out of {len(scored_docs)} tweets for report generation")
|
|
|
|
| 257 |
device="cuda" if torch.cuda.is_available() else -1,
|
| 258 |
torch_dtype=torch.bfloat16,
|
| 259 |
)
|
| 260 |
+
result = pipe(prompt, max_new_tokens=400, return_full_text=False)
|
| 261 |
return result, None
|
| 262 |
except Exception as e:
|
| 263 |
return None, str(e)
|
|
|
|
| 272 |
report = "Error generating report. Please try again with fewer tweets."
|
| 273 |
else:
|
| 274 |
report = raw_result[0][0]['generated_text']
|
| 275 |
+
|
| 276 |
clear_gpu_memory()
|
| 277 |
progress_bar.progress(100)
|
| 278 |
status_text.success("**✅ Generation complete!**")
|