Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -356,44 +356,65 @@ def extract_name(email):
|
|
| 356 |
|
| 357 |
def clean_text(text):
|
| 358 |
"""
|
| 359 |
-
Clean text to remove broken formatting
|
|
|
|
| 360 |
"""
|
| 361 |
text = text.replace('\\n', '\n') # Normalize newlines
|
| 362 |
|
| 363 |
-
# Remove all HTML tags
|
| 364 |
text = re.sub(r'<[^>]*>', '', text)
|
|
|
|
|
|
|
|
|
|
| 365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
# Fix spacing issues between numbers and words
|
| 367 |
text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', r'\1 \2', text)
|
| 368 |
text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)
|
| 369 |
text = re.sub(r'([a-zA-Z])\s*(\d)', r'\1 \2', text)
|
| 370 |
|
| 371 |
-
#
|
| 372 |
-
text = re.sub(r'\*+', '*', text) # Remove excess asterisks but keep single or double for Markdown
|
| 373 |
-
|
| 374 |
-
# Split text into paragraphs and clean each paragraph
|
| 375 |
paragraphs = text.split('\n\n')
|
| 376 |
cleaned_paragraphs = []
|
|
|
|
| 377 |
for paragraph in paragraphs:
|
| 378 |
lines = paragraph.split('\n')
|
| 379 |
cleaned_lines = []
|
|
|
|
| 380 |
for line in lines:
|
| 381 |
-
#
|
| 382 |
-
if line.strip().startswith('
|
| 383 |
cleaned_line = line.strip()
|
| 384 |
else:
|
| 385 |
-
#
|
| 386 |
-
cleaned_line = re.sub(r'
|
| 387 |
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
cleaned_lines.append(cleaned_line)
|
| 390 |
-
|
| 391 |
-
# Join lines
|
| 392 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 393 |
cleaned_paragraphs.append(cleaned_paragraph)
|
| 394 |
-
|
| 395 |
-
# Join
|
| 396 |
-
cleaned_text = '\n\n'.join(cleaned_paragraphs)
|
| 397 |
return cleaned_text
|
| 398 |
|
| 399 |
|
|
@@ -1135,12 +1156,11 @@ Before submitting any content, ensure it includes:
|
|
| 1135 |
|
| 1136 |
#### Report/Article/Write-up/ Blog
|
| 1137 |
- **Introduction**: "Here is a draft of your [Annual Report/Article/Write-up]. Feel free to suggest further refinements."
|
| 1138 |
-
- **Content**:
|
| 1139 |
- Give headlines conversational headings to structure content . Do not include source links within the content.
|
| 1140 |
- Write from the perspective of being part of the organization, using "we".
|
| 1141 |
- Maintain an active, engaging, and direct tone.
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
#### Social Media Posts
|
| 1145 |
- **Introduction Line**: "Here is a draft of your social media post. Feel free to suggest further refinements."
|
| 1146 |
- **Content**:
|
|
@@ -1817,7 +1837,7 @@ def handle_document_query(query):
|
|
| 1817 |
# Generate AI response with document context
|
| 1818 |
full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
|
| 1819 |
try:
|
| 1820 |
-
llm = ChatOpenAI(model="gpt-
|
| 1821 |
response = llm.invoke(full_prompt)
|
| 1822 |
return response.content
|
| 1823 |
except Exception as e:
|
|
@@ -2008,7 +2028,7 @@ def handle_prompt(prompt):
|
|
| 2008 |
" Use the following structure:"
|
| 2009 |
" -Heuristics: examples (e.g., social proof, authority, commitment)."
|
| 2010 |
" -Creative Techniques: examples (list only relevant marketing techniques without additional details)."
|
| 2011 |
-
"The final output must not include AI jargon or marketing buzzwords
|
| 2012 |
)
|
| 2013 |
else:
|
| 2014 |
appended_instructions = ""
|
|
|
|
| 356 |
|
| 357 |
def clean_text(text):
|
| 358 |
"""
|
| 359 |
+
Clean text to remove broken formatting, fix spacing issues,
|
| 360 |
+
handle bullet points, and encode URLs in Markdown links.
|
| 361 |
"""
|
| 362 |
text = text.replace('\\n', '\n') # Normalize newlines
|
| 363 |
|
| 364 |
+
# Remove all HTML tags
|
| 365 |
text = re.sub(r'<[^>]*>', '', text)
|
| 366 |
+
|
| 367 |
+
# Regex to find all Markdown-style links and encode the URLs
|
| 368 |
+
markdown_link_pattern = r'\[([^\]]+)\]\((https?://[^\s]+)\)'
|
| 369 |
|
| 370 |
+
def encode_url(match):
|
| 371 |
+
"""
|
| 372 |
+
Helper function to encode the URL in a Markdown link.
|
| 373 |
+
"""
|
| 374 |
+
link_text = match.group(1) # Text inside []
|
| 375 |
+
url = match.group(2) # URL inside ()
|
| 376 |
+
encoded_url = quote(url, safe=":/?=&") # Encode URL but keep essential characters
|
| 377 |
+
return f"[{link_text}]({encoded_url})"
|
| 378 |
+
|
| 379 |
+
# Encode all URLs in Markdown links
|
| 380 |
+
text = re.sub(markdown_link_pattern, encode_url, text)
|
| 381 |
+
|
| 382 |
# Fix spacing issues between numbers and words
|
| 383 |
text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', r'\1 \2', text)
|
| 384 |
text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)
|
| 385 |
text = re.sub(r'([a-zA-Z])\s*(\d)', r'\1 \2', text)
|
| 386 |
|
| 387 |
+
# Split the text into paragraphs
|
|
|
|
|
|
|
|
|
|
| 388 |
paragraphs = text.split('\n\n')
|
| 389 |
cleaned_paragraphs = []
|
| 390 |
+
|
| 391 |
for paragraph in paragraphs:
|
| 392 |
lines = paragraph.split('\n')
|
| 393 |
cleaned_lines = []
|
| 394 |
+
|
| 395 |
for line in lines:
|
| 396 |
+
# Preserve bold formatting for headings
|
| 397 |
+
if line.strip().startswith('**') and line.strip().endswith('**'):
|
| 398 |
cleaned_line = line.strip()
|
| 399 |
else:
|
| 400 |
+
# Remove asterisks and special characters while preserving valid Markdown
|
| 401 |
+
cleaned_line = re.sub(r'\*|\−|\∗', '', line)
|
| 402 |
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 403 |
|
| 404 |
+
# Handle bullet points
|
| 405 |
+
if cleaned_line.strip().startswith('-'):
|
| 406 |
+
cleaned_line = '\n' + cleaned_line.strip()
|
| 407 |
+
|
| 408 |
+
# Remove extra spaces
|
| 409 |
+
cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
|
| 410 |
cleaned_lines.append(cleaned_line)
|
| 411 |
+
|
| 412 |
+
# Join the lines within each paragraph
|
| 413 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 414 |
cleaned_paragraphs.append(cleaned_paragraph)
|
| 415 |
+
|
| 416 |
+
# Join the paragraphs back together
|
| 417 |
+
cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
|
| 418 |
return cleaned_text
|
| 419 |
|
| 420 |
|
|
|
|
| 1156 |
|
| 1157 |
#### Report/Article/Write-up/ Blog
|
| 1158 |
- **Introduction**: "Here is a draft of your [Annual Report/Article/Write-up]. Feel free to suggest further refinements."
|
| 1159 |
+
- **Content**: **Donot give source link in content**
|
| 1160 |
- Give headlines conversational headings to structure content . Do not include source links within the content.
|
| 1161 |
- Write from the perspective of being part of the organization, using "we".
|
| 1162 |
- Maintain an active, engaging, and direct tone.
|
| 1163 |
+
|
|
|
|
| 1164 |
#### Social Media Posts
|
| 1165 |
- **Introduction Line**: "Here is a draft of your social media post. Feel free to suggest further refinements."
|
| 1166 |
- **Content**:
|
|
|
|
| 1837 |
# Generate AI response with document context
|
| 1838 |
full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
|
| 1839 |
try:
|
| 1840 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=openai_api_key)
|
| 1841 |
response = llm.invoke(full_prompt)
|
| 1842 |
return response.content
|
| 1843 |
except Exception as e:
|
|
|
|
| 2028 |
" Use the following structure:"
|
| 2029 |
" -Heuristics: examples (e.g., social proof, authority, commitment)."
|
| 2030 |
" -Creative Techniques: examples (list only relevant marketing techniques without additional details)."
|
| 2031 |
+
"The final output must not include AI jargon or marketing buzzwords and Give well title and 2-3 sub-headlines. Strictly interconnected sections having Flowing narrative and audience engagement at its peak to create an impactful and memorable experience.Avoid mentioning trustbucket names."
|
| 2032 |
)
|
| 2033 |
else:
|
| 2034 |
appended_instructions = ""
|