Spaces:
Sleeping
Sleeping
T-K-O-H commited on
Commit Β·
42bec52
1
Parent(s): b7cd3e1
HuggingFace Fix PDF Edition
Browse files- README.md +20 -12
- app.py +92 -109
- requirements.txt +8 -7
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
|
@@ -9,11 +9,11 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
#
|
| 13 |
|
| 14 |
-
Transform your
|
| 15 |
|
| 16 |
-
- Extracts
|
| 17 |
- Enhances content using AI
|
| 18 |
- Formats posts for LinkedIn
|
| 19 |
- Verifies content quality
|
|
@@ -21,7 +21,7 @@ Transform your YouTube videos into professional LinkedIn posts with AI-powered c
|
|
| 21 |
|
| 22 |
## Features
|
| 23 |
|
| 24 |
-
-
|
| 25 |
- β¨ AI-powered content enhancement
|
| 26 |
- π LinkedIn post formatting
|
| 27 |
- β Content verification
|
|
@@ -29,14 +29,22 @@ Transform your YouTube videos into professional LinkedIn posts with AI-powered c
|
|
| 29 |
|
| 30 |
## How to Use
|
| 31 |
|
| 32 |
-
1.
|
| 33 |
2. Click "Generate Post"
|
| 34 |
3. Review the enhanced content
|
| 35 |
4. Copy your LinkedIn-ready post
|
| 36 |
|
| 37 |
-
##
|
| 38 |
|
| 39 |
-
|
| 40 |
-
-
|
| 41 |
-
-
|
| 42 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: PDF to LinkedIn Post Converter
|
| 3 |
+
emoji: π
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
|
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# PDF to LinkedIn Post Converter
|
| 13 |
|
| 14 |
+
Transform your PDF documents into professional LinkedIn posts with AI-powered content enhancement. This application:
|
| 15 |
|
| 16 |
+
- Extracts content from PDF files
|
| 17 |
- Enhances content using AI
|
| 18 |
- Formats posts for LinkedIn
|
| 19 |
- Verifies content quality
|
|
|
|
| 21 |
|
| 22 |
## Features
|
| 23 |
|
| 24 |
+
- π PDF document processing
|
| 25 |
- β¨ AI-powered content enhancement
|
| 26 |
- π LinkedIn post formatting
|
| 27 |
- β Content verification
|
|
|
|
| 29 |
|
| 30 |
## How to Use
|
| 31 |
|
| 32 |
+
1. Upload a PDF file
|
| 33 |
2. Click "Generate Post"
|
| 34 |
3. Review the enhanced content
|
| 35 |
4. Copy your LinkedIn-ready post
|
| 36 |
|
| 37 |
+
## Tips for Best Results
|
| 38 |
|
| 39 |
+
- Use well-formatted PDFs with clear text
|
| 40 |
+
- Optimal length: 2-10 pages
|
| 41 |
+
- Ensure PDFs have readable text (not scanned images)
|
| 42 |
+
- Review and personalize the post before sharing
|
| 43 |
+
- Consider your target audience when selecting content
|
| 44 |
+
|
| 45 |
+
## Sample PDFs
|
| 46 |
+
|
| 47 |
+
Try these PDFs to test the application:
|
| 48 |
+
- Open AI PDF: https://example.com/open-ai.pdf
|
| 49 |
+
- Financial News PDF: https://example.com/financial-news.pdf
|
| 50 |
+
- PDF About AI: https://example.com/ai.pdf
|
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
| 5 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 6 |
from langchain.prompts import ChatPromptTemplate
|
| 7 |
from langchain_core.output_parsers import StrOutputParser
|
|
@@ -12,6 +11,7 @@ from langchain_chroma import Chroma
|
|
| 12 |
from langchain.schema import Document
|
| 13 |
from datetime import datetime
|
| 14 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 15 |
|
| 16 |
# Load environment variables
|
| 17 |
load_dotenv(verbose=True)
|
|
@@ -22,8 +22,8 @@ if not os.getenv("OPENAI_API_KEY"):
|
|
| 22 |
|
| 23 |
# Define state types
|
| 24 |
class ProcessState(TypedDict):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
enhanced: str
|
| 28 |
linkedin_post: str
|
| 29 |
verification: dict
|
|
@@ -34,29 +34,28 @@ class ProcessState(TypedDict):
|
|
| 34 |
needs_improvement: bool
|
| 35 |
research_context: str
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
"""Extract
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
def
|
| 44 |
-
"""Get
|
| 45 |
try:
|
| 46 |
-
progress(0.25, desc="
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
state["
|
| 50 |
-
state["status"] = "β
Transcript fetched"
|
| 51 |
return state
|
| 52 |
except Exception as e:
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
state["error"] = "β οΈ YouTube API rate limit reached. Please wait a few minutes and try again."
|
| 56 |
-
state["status"] = "β Rate limit exceeded"
|
| 57 |
-
else:
|
| 58 |
-
state["error"] = f"β οΈ Error fetching transcript: {str(e)}"
|
| 59 |
-
state["status"] = "β Failed to fetch transcript"
|
| 60 |
return state
|
| 61 |
|
| 62 |
def get_chroma_collection():
|
|
@@ -72,9 +71,9 @@ def get_chroma_collection():
|
|
| 72 |
raise Exception(f"Error creating Chroma collection: {str(e)}")
|
| 73 |
|
| 74 |
def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
| 75 |
-
"""Enhance the
|
| 76 |
try:
|
| 77 |
-
if not state["
|
| 78 |
return state
|
| 79 |
|
| 80 |
progress(0.50, desc="Enhancing content...")
|
|
@@ -82,14 +81,14 @@ def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState
|
|
| 82 |
# Get similar content from the vector store
|
| 83 |
collection = get_chroma_collection()
|
| 84 |
similar_docs = collection.similarity_search(
|
| 85 |
-
state["
|
| 86 |
k=3
|
| 87 |
)
|
| 88 |
|
| 89 |
# Initialize LLM for content generation
|
| 90 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
|
| 91 |
prompt = ChatPromptTemplate.from_messages([
|
| 92 |
-
("system", """You are an expert content enhancer. Transform this
|
| 93 |
|
| 94 |
1. Identify and emphasize key points
|
| 95 |
2. Add context and examples
|
|
@@ -97,8 +96,8 @@ def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState
|
|
| 97 |
4. Keep it concise (max 3000 characters)
|
| 98 |
5. Maintain factual accuracy
|
| 99 |
|
| 100 |
-
|
| 101 |
-
{
|
| 102 |
|
| 103 |
Similar Content for Context:
|
| 104 |
{similar_content}
|
|
@@ -108,7 +107,7 @@ Similar Content for Context:
|
|
| 108 |
|
| 109 |
chain = prompt | llm | StrOutputParser()
|
| 110 |
state["enhanced"] = chain.invoke({
|
| 111 |
-
"
|
| 112 |
"similar_content": "\n".join([doc.page_content for doc in similar_docs])
|
| 113 |
})
|
| 114 |
state["status"] = "β
Content enhanced"
|
|
@@ -176,7 +175,7 @@ Remember: The goal is to make the content more engaging while keeping ALL the or
|
|
| 176 |
def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
| 177 |
"""Verify the enhanced content against the original using semantic similarity."""
|
| 178 |
try:
|
| 179 |
-
if not state["enhanced"] or not state["
|
| 180 |
return state
|
| 181 |
|
| 182 |
progress(1.0, desc="Verifying content...")
|
|
@@ -195,7 +194,7 @@ def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
|
| 195 |
if similar_docs:
|
| 196 |
# Chroma returns a list of Document objects with a score attribute
|
| 197 |
# But the default similarity_search does not return scores, so we just check if content is similar
|
| 198 |
-
similarity_score = 1.0 if similar_docs[0].page_content == state["
|
| 199 |
|
| 200 |
# Initialize LLM for verification
|
| 201 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
|
@@ -225,7 +224,7 @@ Semantic Similarity Score: {similarity_score}"""),
|
|
| 225 |
|
| 226 |
chain = prompt | llm | StrOutputParser()
|
| 227 |
verification_result = json.loads(chain.invoke({
|
| 228 |
-
"original": state["
|
| 229 |
"enhanced": state["enhanced"],
|
| 230 |
"similarity_score": similarity_score
|
| 231 |
}))
|
|
@@ -262,7 +261,7 @@ def create_workflow() -> StateGraph:
|
|
| 262 |
workflow = StateGraph(ProcessState)
|
| 263 |
|
| 264 |
# Add nodes
|
| 265 |
-
workflow.add_node("
|
| 266 |
workflow.add_node("enhance_content", enhance_content)
|
| 267 |
workflow.add_node("format_linkedin", format_linkedin_post)
|
| 268 |
workflow.add_node("verify_content", verify_content)
|
|
@@ -271,10 +270,10 @@ def create_workflow() -> StateGraph:
|
|
| 271 |
workflow.add_node("enhance_again", enhance_again)
|
| 272 |
|
| 273 |
# Set entry point
|
| 274 |
-
workflow.set_entry_point("
|
| 275 |
|
| 276 |
# Add edges for main flow
|
| 277 |
-
workflow.add_edge("
|
| 278 |
workflow.add_edge("enhance_content", "format_linkedin")
|
| 279 |
workflow.add_edge("format_linkedin", "verify_content")
|
| 280 |
workflow.add_edge("verify_content", "agent_decide")
|
|
@@ -295,7 +294,7 @@ def create_workflow() -> StateGraph:
|
|
| 295 |
|
| 296 |
# Add conditional edges for error handling
|
| 297 |
workflow.add_conditional_edges(
|
| 298 |
-
"
|
| 299 |
should_continue,
|
| 300 |
{
|
| 301 |
True: "enhance_content",
|
|
@@ -419,11 +418,11 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
|
|
| 419 |
# Select appropriate workflow based on stage
|
| 420 |
if start_stage == "enhance":
|
| 421 |
workflow = create_workflow()
|
| 422 |
-
if not state["
|
| 423 |
return (
|
| 424 |
-
"β οΈ No
|
| 425 |
-
"β Failed: No
|
| 426 |
-
state.get("
|
| 427 |
"",
|
| 428 |
"",
|
| 429 |
""
|
|
@@ -434,7 +433,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
|
|
| 434 |
return (
|
| 435 |
"β οΈ No enhanced content available to format",
|
| 436 |
"β Failed: No enhanced content",
|
| 437 |
-
state.get("
|
| 438 |
state.get("enhanced", ""),
|
| 439 |
"",
|
| 440 |
""
|
|
@@ -457,7 +456,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
|
|
| 457 |
return (
|
| 458 |
final_state.get("error", ""),
|
| 459 |
final_state.get("status", ""),
|
| 460 |
-
final_state.get("
|
| 461 |
final_state.get("enhanced", ""),
|
| 462 |
final_state.get("linkedin_post", ""),
|
| 463 |
verification_text
|
|
@@ -467,7 +466,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
|
|
| 467 |
return (
|
| 468 |
f"β οΈ Error: {str(e)}",
|
| 469 |
"β Processing failed",
|
| 470 |
-
state.get("
|
| 471 |
state.get("enhanced", ""),
|
| 472 |
state.get("linkedin_post", ""),
|
| 473 |
""
|
|
@@ -555,8 +554,8 @@ def format_research_results(research: dict) -> str:
|
|
| 555 |
def create_ui():
|
| 556 |
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
|
| 557 |
current_state = gr.State({
|
| 558 |
-
"
|
| 559 |
-
"
|
| 560 |
"enhanced": "",
|
| 561 |
"linkedin_post": "",
|
| 562 |
"verification": {},
|
|
@@ -570,33 +569,25 @@ def create_ui():
|
|
| 570 |
|
| 571 |
gr.Markdown(
|
| 572 |
"""
|
| 573 |
-
#
|
| 574 |
-
Transform your
|
| 575 |
|
| 576 |
-
###
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
2. Financial News: https://www.youtube.com/watch?v=hvP1UNALZ3g
|
| 583 |
-
Agent will likely decide to not improve this post
|
| 584 |
-
|
| 585 |
-
3. Video About AI: https://www.youtube.com/watch?v=Yq0QkCxoTHM
|
| 586 |
-
Agent will likely decide to improve this post
|
| 587 |
-
```
|
| 588 |
-
These videos are chosen to show the application's ability to handle different types of professional content.
|
| 589 |
"""
|
| 590 |
)
|
| 591 |
|
| 592 |
with gr.Row():
|
| 593 |
with gr.Column():
|
| 594 |
-
|
| 595 |
-
label="
|
| 596 |
-
|
| 597 |
-
|
| 598 |
)
|
| 599 |
-
|
| 600 |
|
| 601 |
status = gr.Textbox(
|
| 602 |
label="Status",
|
|
@@ -614,8 +605,8 @@ def create_ui():
|
|
| 614 |
with gr.TabItem("π Content"):
|
| 615 |
with gr.Row():
|
| 616 |
with gr.Column():
|
| 617 |
-
|
| 618 |
-
label="π Raw
|
| 619 |
interactive=False,
|
| 620 |
show_copy_button=True,
|
| 621 |
lines=8
|
|
@@ -680,7 +671,7 @@ def create_ui():
|
|
| 680 |
|
| 681 |
# Loading indicators
|
| 682 |
with gr.Row(visible=False) as loading_indicators:
|
| 683 |
-
|
| 684 |
enhanced_loading = gr.Markdown("π Enhancing content...")
|
| 685 |
linkedin_loading = gr.Markdown("π Formatting for LinkedIn...")
|
| 686 |
verify_loading = gr.Markdown("π Verifying content...")
|
|
@@ -692,9 +683,9 @@ def create_ui():
|
|
| 692 |
gr.Markdown(
|
| 693 |
"""
|
| 694 |
### How to Use
|
| 695 |
-
1. **Input**:
|
| 696 |
2. **Process**: Click the "Generate Post" button
|
| 697 |
-
3. **Wait**: The system will process your
|
| 698 |
4. **Review**: Check the generated content in each tab
|
| 699 |
5. **Copy**: Use the copy button to grab your LinkedIn post
|
| 700 |
|
|
@@ -703,19 +694,18 @@ def create_ui():
|
|
| 703 |
- Click π next to "LinkedIn Post" to regenerate from the formatting stage
|
| 704 |
|
| 705 |
### π‘ Tips for Best Results
|
| 706 |
-
- Use
|
| 707 |
-
- Optimal
|
| 708 |
-
- Ensure
|
| 709 |
- Review and personalize the post before sharing
|
| 710 |
-
- Consider your target audience when selecting
|
| 711 |
-
|
| 712 |
"""
|
| 713 |
)
|
| 714 |
|
| 715 |
def update_loading_state(stage: str):
|
| 716 |
"""Update loading indicators based on current stage."""
|
| 717 |
states = {
|
| 718 |
-
"
|
| 719 |
"enhance": [False, True, False, False, False, False, False],
|
| 720 |
"format": [False, False, True, False, False, False, False],
|
| 721 |
"verify": [False, False, False, True, False, False, False],
|
|
@@ -727,7 +717,7 @@ def create_ui():
|
|
| 727 |
|
| 728 |
# Loading messages for each stage
|
| 729 |
loading_messages = {
|
| 730 |
-
"
|
| 731 |
"enhance": "β¨ Enhancing content...\nβ‘ AI is working its magic...",
|
| 732 |
"format": "π¨ Formatting for LinkedIn...\nπ Creating engaging post...",
|
| 733 |
"verify": "π Verifying content...\nβοΈ Checking accuracy...",
|
|
@@ -744,8 +734,8 @@ def create_ui():
|
|
| 744 |
gr.update(visible=state) for state in states.get(stage, [False] * 7)
|
| 745 |
], current_message
|
| 746 |
|
| 747 |
-
def process_with_loading(
|
| 748 |
-
"""Process
|
| 749 |
try:
|
| 750 |
# Initialize state if needed
|
| 751 |
if "improvement_plan" not in state:
|
|
@@ -758,11 +748,11 @@ def create_ui():
|
|
| 758 |
state["needs_improvement"] = False
|
| 759 |
|
| 760 |
# Show loading indicators
|
| 761 |
-
loading_states, message = update_loading_state("
|
| 762 |
yield [
|
| 763 |
"", # error
|
| 764 |
"Processing...", # status
|
| 765 |
-
message, #
|
| 766 |
"", # enhanced
|
| 767 |
"", # linkedin
|
| 768 |
"", # verification
|
|
@@ -773,16 +763,16 @@ def create_ui():
|
|
| 773 |
*loading_states # loading indicators
|
| 774 |
]
|
| 775 |
|
| 776 |
-
# Get
|
| 777 |
-
state["
|
| 778 |
-
|
| 779 |
|
| 780 |
# Show enhancing state
|
| 781 |
loading_states, message = update_loading_state("enhance")
|
| 782 |
yield [
|
| 783 |
"",
|
| 784 |
"Enhancing content...",
|
| 785 |
-
|
| 786 |
message, # enhanced (loading)
|
| 787 |
"",
|
| 788 |
"",
|
|
@@ -794,7 +784,7 @@ def create_ui():
|
|
| 794 |
]
|
| 795 |
|
| 796 |
# Enhance content
|
| 797 |
-
state["
|
| 798 |
enhanced_state = enhance_content(state)
|
| 799 |
enhanced_text = enhanced_state["enhanced"]
|
| 800 |
|
|
@@ -803,7 +793,7 @@ def create_ui():
|
|
| 803 |
yield [
|
| 804 |
"",
|
| 805 |
"Formatting for LinkedIn...",
|
| 806 |
-
|
| 807 |
enhanced_text,
|
| 808 |
message, # linkedin (loading)
|
| 809 |
"",
|
|
@@ -824,7 +814,7 @@ def create_ui():
|
|
| 824 |
yield [
|
| 825 |
"",
|
| 826 |
"Verifying content...",
|
| 827 |
-
|
| 828 |
enhanced_text,
|
| 829 |
linkedin_text,
|
| 830 |
"π Verifying...\nβοΈ Analyzing accuracy...", # verification (loading)
|
|
@@ -851,7 +841,7 @@ def create_ui():
|
|
| 851 |
yield [
|
| 852 |
"",
|
| 853 |
f"Creating improvement plan (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 854 |
-
|
| 855 |
enhanced_text,
|
| 856 |
linkedin_text,
|
| 857 |
verification_text,
|
|
@@ -867,7 +857,7 @@ def create_ui():
|
|
| 867 |
yield [
|
| 868 |
"",
|
| 869 |
f"Researching content (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 870 |
-
|
| 871 |
enhanced_text,
|
| 872 |
linkedin_text,
|
| 873 |
verification_text,
|
|
@@ -887,7 +877,7 @@ def create_ui():
|
|
| 887 |
yield [
|
| 888 |
"",
|
| 889 |
f"Enhancing content again (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 890 |
-
|
| 891 |
enhanced_text,
|
| 892 |
linkedin_text,
|
| 893 |
verification_text,
|
|
@@ -921,7 +911,7 @@ def create_ui():
|
|
| 921 |
yield [
|
| 922 |
"",
|
| 923 |
f"Creating improved LinkedIn post (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 924 |
-
|
| 925 |
enhanced_text,
|
| 926 |
linkedin_text,
|
| 927 |
verification_text,
|
|
@@ -944,7 +934,7 @@ def create_ui():
|
|
| 944 |
yield [
|
| 945 |
"",
|
| 946 |
"β
Processing complete!",
|
| 947 |
-
|
| 948 |
enhanced_text,
|
| 949 |
linkedin_text,
|
| 950 |
verification_text,
|
|
@@ -960,7 +950,7 @@ def create_ui():
|
|
| 960 |
yield [
|
| 961 |
f"β οΈ Error: {str(e)}",
|
| 962 |
"β Processing failed",
|
| 963 |
-
state.get("
|
| 964 |
state.get("enhanced", ""),
|
| 965 |
state.get("linkedin_post", ""),
|
| 966 |
"",
|
|
@@ -972,13 +962,13 @@ def create_ui():
|
|
| 972 |
]
|
| 973 |
|
| 974 |
# Set up event handlers
|
| 975 |
-
|
| 976 |
fn=process_with_loading,
|
| 977 |
-
inputs=[
|
| 978 |
outputs=[
|
| 979 |
error,
|
| 980 |
status,
|
| 981 |
-
|
| 982 |
enhanced,
|
| 983 |
linkedin,
|
| 984 |
verification,
|
|
@@ -986,7 +976,7 @@ def create_ui():
|
|
| 986 |
research_results,
|
| 987 |
improved_linkedin,
|
| 988 |
current_state,
|
| 989 |
-
|
| 990 |
enhanced_loading,
|
| 991 |
linkedin_loading,
|
| 992 |
verify_loading,
|
|
@@ -1011,7 +1001,7 @@ def create_ui():
|
|
| 1011 |
return {
|
| 1012 |
loading: gr.update(visible=is_loading)
|
| 1013 |
for loading in [
|
| 1014 |
-
|
| 1015 |
enhanced_loading,
|
| 1016 |
linkedin_loading,
|
| 1017 |
verify_loading,
|
|
@@ -1021,10 +1011,10 @@ def create_ui():
|
|
| 1021 |
]
|
| 1022 |
}
|
| 1023 |
|
| 1024 |
-
|
| 1025 |
lambda: update_loading_visibility(True),
|
| 1026 |
None,
|
| 1027 |
-
[
|
| 1028 |
verify_loading, plan_loading, research_loading, improved_loading],
|
| 1029 |
queue=False
|
| 1030 |
)
|
|
@@ -1239,11 +1229,4 @@ Important:
|
|
| 1239 |
|
| 1240 |
if __name__ == "__main__":
|
| 1241 |
demo = create_ui()
|
| 1242 |
-
demo.
|
| 1243 |
-
demo.launch(
|
| 1244 |
-
server_name="0.0.0.0", # Required for Hugging Face Spaces
|
| 1245 |
-
server_port=7860, # Standard port for Hugging Face Spaces
|
| 1246 |
-
show_error=True,
|
| 1247 |
-
share=False, # Disable sharing for production
|
| 1248 |
-
show_api=False
|
| 1249 |
-
)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
from dotenv import load_dotenv
|
|
|
|
| 4 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 5 |
from langchain.prompts import ChatPromptTemplate
|
| 6 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
| 11 |
from langchain.schema import Document
|
| 12 |
from datetime import datetime
|
| 13 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 14 |
+
from pypdf import PdfReader
|
| 15 |
|
| 16 |
# Load environment variables
|
| 17 |
load_dotenv(verbose=True)
|
|
|
|
| 22 |
|
| 23 |
# Define state types
|
| 24 |
class ProcessState(TypedDict):
|
| 25 |
+
pdf_file: str
|
| 26 |
+
content: str
|
| 27 |
enhanced: str
|
| 28 |
linkedin_post: str
|
| 29 |
verification: dict
|
|
|
|
| 34 |
needs_improvement: bool
|
| 35 |
research_context: str
|
| 36 |
|
| 37 |
+
def extract_pdf_content(pdf_file: str) -> str:
|
| 38 |
+
"""Extract text content from PDF file."""
|
| 39 |
+
try:
|
| 40 |
+
reader = PdfReader(pdf_file)
|
| 41 |
+
text = ""
|
| 42 |
+
for page in reader.pages:
|
| 43 |
+
text += page.extract_text() + "\n"
|
| 44 |
+
return text.strip()
|
| 45 |
+
except Exception as e:
|
| 46 |
+
raise Exception(f"Error extracting PDF content: {str(e)}")
|
| 47 |
|
| 48 |
+
def get_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
| 49 |
+
"""Get content from PDF file."""
|
| 50 |
try:
|
| 51 |
+
progress(0.25, desc="Extracting PDF content...")
|
| 52 |
+
content = extract_pdf_content(state["pdf_file"])
|
| 53 |
+
state["content"] = content
|
| 54 |
+
state["status"] = "β
PDF content extracted"
|
|
|
|
| 55 |
return state
|
| 56 |
except Exception as e:
|
| 57 |
+
state["error"] = f"β οΈ Error extracting PDF content: {str(e)}"
|
| 58 |
+
state["status"] = "β Failed to extract PDF content"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return state
|
| 60 |
|
| 61 |
def get_chroma_collection():
|
|
|
|
| 71 |
raise Exception(f"Error creating Chroma collection: {str(e)}")
|
| 72 |
|
| 73 |
def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
| 74 |
+
"""Enhance the PDF content with semantic search and similarity analysis."""
|
| 75 |
try:
|
| 76 |
+
if not state["content"]:
|
| 77 |
return state
|
| 78 |
|
| 79 |
progress(0.50, desc="Enhancing content...")
|
|
|
|
| 81 |
# Get similar content from the vector store
|
| 82 |
collection = get_chroma_collection()
|
| 83 |
similar_docs = collection.similarity_search(
|
| 84 |
+
state["content"],
|
| 85 |
k=3
|
| 86 |
)
|
| 87 |
|
| 88 |
# Initialize LLM for content generation
|
| 89 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
|
| 90 |
prompt = ChatPromptTemplate.from_messages([
|
| 91 |
+
("system", """You are an expert content enhancer. Transform this PDF content into engaging content:
|
| 92 |
|
| 93 |
1. Identify and emphasize key points
|
| 94 |
2. Add context and examples
|
|
|
|
| 96 |
4. Keep it concise (max 3000 characters)
|
| 97 |
5. Maintain factual accuracy
|
| 98 |
|
| 99 |
+
Content:
|
| 100 |
+
{content}
|
| 101 |
|
| 102 |
Similar Content for Context:
|
| 103 |
{similar_content}
|
|
|
|
| 107 |
|
| 108 |
chain = prompt | llm | StrOutputParser()
|
| 109 |
state["enhanced"] = chain.invoke({
|
| 110 |
+
"content": state["content"],
|
| 111 |
"similar_content": "\n".join([doc.page_content for doc in similar_docs])
|
| 112 |
})
|
| 113 |
state["status"] = "β
Content enhanced"
|
|
|
|
| 175 |
def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
|
| 176 |
"""Verify the enhanced content against the original using semantic similarity."""
|
| 177 |
try:
|
| 178 |
+
if not state["enhanced"] or not state["content"]:
|
| 179 |
return state
|
| 180 |
|
| 181 |
progress(1.0, desc="Verifying content...")
|
|
|
|
| 194 |
if similar_docs:
|
| 195 |
# Chroma returns a list of Document objects with a score attribute
|
| 196 |
# But the default similarity_search does not return scores, so we just check if content is similar
|
| 197 |
+
similarity_score = 1.0 if similar_docs[0].page_content == state["content"] else 0.0
|
| 198 |
|
| 199 |
# Initialize LLM for verification
|
| 200 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
|
|
|
| 224 |
|
| 225 |
chain = prompt | llm | StrOutputParser()
|
| 226 |
verification_result = json.loads(chain.invoke({
|
| 227 |
+
"original": state["content"],
|
| 228 |
"enhanced": state["enhanced"],
|
| 229 |
"similarity_score": similarity_score
|
| 230 |
}))
|
|
|
|
| 261 |
workflow = StateGraph(ProcessState)
|
| 262 |
|
| 263 |
# Add nodes
|
| 264 |
+
workflow.add_node("get_content", get_content)
|
| 265 |
workflow.add_node("enhance_content", enhance_content)
|
| 266 |
workflow.add_node("format_linkedin", format_linkedin_post)
|
| 267 |
workflow.add_node("verify_content", verify_content)
|
|
|
|
| 270 |
workflow.add_node("enhance_again", enhance_again)
|
| 271 |
|
| 272 |
# Set entry point
|
| 273 |
+
workflow.set_entry_point("get_content")
|
| 274 |
|
| 275 |
# Add edges for main flow
|
| 276 |
+
workflow.add_edge("get_content", "enhance_content")
|
| 277 |
workflow.add_edge("enhance_content", "format_linkedin")
|
| 278 |
workflow.add_edge("format_linkedin", "verify_content")
|
| 279 |
workflow.add_edge("verify_content", "agent_decide")
|
|
|
|
| 294 |
|
| 295 |
# Add conditional edges for error handling
|
| 296 |
workflow.add_conditional_edges(
|
| 297 |
+
"get_content",
|
| 298 |
should_continue,
|
| 299 |
{
|
| 300 |
True: "enhance_content",
|
|
|
|
| 418 |
# Select appropriate workflow based on stage
|
| 419 |
if start_stage == "enhance":
|
| 420 |
workflow = create_workflow()
|
| 421 |
+
if not state["content"]:
|
| 422 |
return (
|
| 423 |
+
"β οΈ No content available to enhance",
|
| 424 |
+
"β Failed: No content",
|
| 425 |
+
state.get("content", ""),
|
| 426 |
"",
|
| 427 |
"",
|
| 428 |
""
|
|
|
|
| 433 |
return (
|
| 434 |
"β οΈ No enhanced content available to format",
|
| 435 |
"β Failed: No enhanced content",
|
| 436 |
+
state.get("content", ""),
|
| 437 |
state.get("enhanced", ""),
|
| 438 |
"",
|
| 439 |
""
|
|
|
|
| 456 |
return (
|
| 457 |
final_state.get("error", ""),
|
| 458 |
final_state.get("status", ""),
|
| 459 |
+
final_state.get("content", ""),
|
| 460 |
final_state.get("enhanced", ""),
|
| 461 |
final_state.get("linkedin_post", ""),
|
| 462 |
verification_text
|
|
|
|
| 466 |
return (
|
| 467 |
f"β οΈ Error: {str(e)}",
|
| 468 |
"β Processing failed",
|
| 469 |
+
state.get("content", ""),
|
| 470 |
state.get("enhanced", ""),
|
| 471 |
state.get("linkedin_post", ""),
|
| 472 |
""
|
|
|
|
| 554 |
def create_ui():
|
| 555 |
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
|
| 556 |
current_state = gr.State({
|
| 557 |
+
"pdf_file": "",
|
| 558 |
+
"content": "",
|
| 559 |
"enhanced": "",
|
| 560 |
"linkedin_post": "",
|
| 561 |
"verification": {},
|
|
|
|
| 569 |
|
| 570 |
gr.Markdown(
|
| 571 |
"""
|
| 572 |
+
# PDF to LinkedIn Post Converter
|
| 573 |
+
Transform your PDF documents into professional LinkedIn posts with AI content enhancement.
|
| 574 |
|
| 575 |
+
### π How to Use
|
| 576 |
+
1. Upload a PDF file
|
| 577 |
+
2. Click "Generate Post"
|
| 578 |
+
3. Review the enhanced content
|
| 579 |
+
4. Copy your LinkedIn-ready post
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
"""
|
| 581 |
)
|
| 582 |
|
| 583 |
with gr.Row():
|
| 584 |
with gr.Column():
|
| 585 |
+
pdf_file = gr.File(
|
| 586 |
+
label="PDF File",
|
| 587 |
+
file_types=[".pdf"],
|
| 588 |
+
type="filepath"
|
| 589 |
)
|
| 590 |
+
convert_btn = gr.Button("π Generate from PDF", variant="primary", size="lg")
|
| 591 |
|
| 592 |
status = gr.Textbox(
|
| 593 |
label="Status",
|
|
|
|
| 605 |
with gr.TabItem("π Content"):
|
| 606 |
with gr.Row():
|
| 607 |
with gr.Column():
|
| 608 |
+
content = gr.TextArea(
|
| 609 |
+
label="π Raw Content",
|
| 610 |
interactive=False,
|
| 611 |
show_copy_button=True,
|
| 612 |
lines=8
|
|
|
|
| 671 |
|
| 672 |
# Loading indicators
|
| 673 |
with gr.Row(visible=False) as loading_indicators:
|
| 674 |
+
content_loading = gr.Markdown("π Extracting content...")
|
| 675 |
enhanced_loading = gr.Markdown("π Enhancing content...")
|
| 676 |
linkedin_loading = gr.Markdown("π Formatting for LinkedIn...")
|
| 677 |
verify_loading = gr.Markdown("π Verifying content...")
|
|
|
|
| 683 |
gr.Markdown(
|
| 684 |
"""
|
| 685 |
### How to Use
|
| 686 |
+
1. **Input**: Upload a PDF file
|
| 687 |
2. **Process**: Click the "Generate Post" button
|
| 688 |
+
3. **Wait**: The system will process your PDF through multiple steps
|
| 689 |
4. **Review**: Check the generated content in each tab
|
| 690 |
5. **Copy**: Use the copy button to grab your LinkedIn post
|
| 691 |
|
|
|
|
| 694 |
- Click π next to "LinkedIn Post" to regenerate from the formatting stage
|
| 695 |
|
| 696 |
### π‘ Tips for Best Results
|
| 697 |
+
- Use well-formatted PDFs with clear text
|
| 698 |
+
- Optimal length: 2-10 pages
|
| 699 |
+
- Ensure PDFs have readable text (not scanned images)
|
| 700 |
- Review and personalize the post before sharing
|
| 701 |
+
- Consider your target audience when selecting content
|
|
|
|
| 702 |
"""
|
| 703 |
)
|
| 704 |
|
| 705 |
def update_loading_state(stage: str):
|
| 706 |
"""Update loading indicators based on current stage."""
|
| 707 |
states = {
|
| 708 |
+
"content": [True, False, False, False, False, False, False],
|
| 709 |
"enhance": [False, True, False, False, False, False, False],
|
| 710 |
"format": [False, False, True, False, False, False, False],
|
| 711 |
"verify": [False, False, False, True, False, False, False],
|
|
|
|
| 717 |
|
| 718 |
# Loading messages for each stage
|
| 719 |
loading_messages = {
|
| 720 |
+
"content": "π Extracting content...\nβ³ Please wait...",
|
| 721 |
"enhance": "β¨ Enhancing content...\nβ‘ AI is working its magic...",
|
| 722 |
"format": "π¨ Formatting for LinkedIn...\nπ Creating engaging post...",
|
| 723 |
"verify": "π Verifying content...\nβοΈ Checking accuracy...",
|
|
|
|
| 734 |
gr.update(visible=state) for state in states.get(stage, [False] * 7)
|
| 735 |
], current_message
|
| 736 |
|
| 737 |
+
def process_with_loading(pdf_path, state):
|
| 738 |
+
"""Process PDF with loading indicators."""
|
| 739 |
try:
|
| 740 |
# Initialize state if needed
|
| 741 |
if "improvement_plan" not in state:
|
|
|
|
| 748 |
state["needs_improvement"] = False
|
| 749 |
|
| 750 |
# Show loading indicators
|
| 751 |
+
loading_states, message = update_loading_state("content")
|
| 752 |
yield [
|
| 753 |
"", # error
|
| 754 |
"Processing...", # status
|
| 755 |
+
message, # content (loading)
|
| 756 |
"", # enhanced
|
| 757 |
"", # linkedin
|
| 758 |
"", # verification
|
|
|
|
| 763 |
*loading_states # loading indicators
|
| 764 |
]
|
| 765 |
|
| 766 |
+
# Get content
|
| 767 |
+
state["pdf_file"] = pdf_path
|
| 768 |
+
content_text = get_content(state)["content"]
|
| 769 |
|
| 770 |
# Show enhancing state
|
| 771 |
loading_states, message = update_loading_state("enhance")
|
| 772 |
yield [
|
| 773 |
"",
|
| 774 |
"Enhancing content...",
|
| 775 |
+
content_text,
|
| 776 |
message, # enhanced (loading)
|
| 777 |
"",
|
| 778 |
"",
|
|
|
|
| 784 |
]
|
| 785 |
|
| 786 |
# Enhance content
|
| 787 |
+
state["content"] = content_text
|
| 788 |
enhanced_state = enhance_content(state)
|
| 789 |
enhanced_text = enhanced_state["enhanced"]
|
| 790 |
|
|
|
|
| 793 |
yield [
|
| 794 |
"",
|
| 795 |
"Formatting for LinkedIn...",
|
| 796 |
+
content_text,
|
| 797 |
enhanced_text,
|
| 798 |
message, # linkedin (loading)
|
| 799 |
"",
|
|
|
|
| 814 |
yield [
|
| 815 |
"",
|
| 816 |
"Verifying content...",
|
| 817 |
+
content_text,
|
| 818 |
enhanced_text,
|
| 819 |
linkedin_text,
|
| 820 |
"π Verifying...\nβοΈ Analyzing accuracy...", # verification (loading)
|
|
|
|
| 841 |
yield [
|
| 842 |
"",
|
| 843 |
f"Creating improvement plan (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 844 |
+
content_text,
|
| 845 |
enhanced_text,
|
| 846 |
linkedin_text,
|
| 847 |
verification_text,
|
|
|
|
| 857 |
yield [
|
| 858 |
"",
|
| 859 |
f"Researching content (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 860 |
+
content_text,
|
| 861 |
enhanced_text,
|
| 862 |
linkedin_text,
|
| 863 |
verification_text,
|
|
|
|
| 877 |
yield [
|
| 878 |
"",
|
| 879 |
f"Enhancing content again (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 880 |
+
content_text,
|
| 881 |
enhanced_text,
|
| 882 |
linkedin_text,
|
| 883 |
verification_text,
|
|
|
|
| 911 |
yield [
|
| 912 |
"",
|
| 913 |
f"Creating improved LinkedIn post (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 914 |
+
content_text,
|
| 915 |
enhanced_text,
|
| 916 |
linkedin_text,
|
| 917 |
verification_text,
|
|
|
|
| 934 |
yield [
|
| 935 |
"",
|
| 936 |
"β
Processing complete!",
|
| 937 |
+
content_text,
|
| 938 |
enhanced_text,
|
| 939 |
linkedin_text,
|
| 940 |
verification_text,
|
|
|
|
| 950 |
yield [
|
| 951 |
f"β οΈ Error: {str(e)}",
|
| 952 |
"β Processing failed",
|
| 953 |
+
state.get("content", ""),
|
| 954 |
state.get("enhanced", ""),
|
| 955 |
state.get("linkedin_post", ""),
|
| 956 |
"",
|
|
|
|
| 962 |
]
|
| 963 |
|
| 964 |
# Set up event handlers
|
| 965 |
+
convert_btn.click(
|
| 966 |
fn=process_with_loading,
|
| 967 |
+
inputs=[pdf_file, current_state],
|
| 968 |
outputs=[
|
| 969 |
error,
|
| 970 |
status,
|
| 971 |
+
content,
|
| 972 |
enhanced,
|
| 973 |
linkedin,
|
| 974 |
verification,
|
|
|
|
| 976 |
research_results,
|
| 977 |
improved_linkedin,
|
| 978 |
current_state,
|
| 979 |
+
content_loading,
|
| 980 |
enhanced_loading,
|
| 981 |
linkedin_loading,
|
| 982 |
verify_loading,
|
|
|
|
| 1001 |
return {
|
| 1002 |
loading: gr.update(visible=is_loading)
|
| 1003 |
for loading in [
|
| 1004 |
+
content_loading,
|
| 1005 |
enhanced_loading,
|
| 1006 |
linkedin_loading,
|
| 1007 |
verify_loading,
|
|
|
|
| 1011 |
]
|
| 1012 |
}
|
| 1013 |
|
| 1014 |
+
convert_btn.click(
|
| 1015 |
lambda: update_loading_visibility(True),
|
| 1016 |
None,
|
| 1017 |
+
[content_loading, enhanced_loading, linkedin_loading,
|
| 1018 |
verify_loading, plan_loading, research_loading, improved_loading],
|
| 1019 |
queue=False
|
| 1020 |
)
|
|
|
|
| 1229 |
|
| 1230 |
if __name__ == "__main__":
|
| 1231 |
demo = create_ui()
|
| 1232 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
-
gradio>=4.
|
| 2 |
-
python-dotenv>=1.0.
|
| 3 |
youtube-transcript-api>=0.6.2
|
| 4 |
-
langchain-openai>=0.0.
|
| 5 |
-
langchain>=0.1.
|
| 6 |
-
langgraph>=0.0.
|
| 7 |
-
langchain-community>=0.0.
|
| 8 |
-
langchain-chroma>=0.
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
python-dotenv>=1.0.0
|
| 3 |
youtube-transcript-api>=0.6.2
|
| 4 |
+
langchain-openai>=0.0.5
|
| 5 |
+
langchain>=0.1.0
|
| 6 |
+
langgraph>=0.0.20
|
| 7 |
+
langchain-community>=0.0.13
|
| 8 |
+
langchain-chroma>=0.0.5
|
| 9 |
+
pypdf>=3.17.1
|