T-K-O-H commited on
Commit
42bec52
Β·
1 Parent(s): b7cd3e1

HuggingFace Fix PDF Edition

Browse files
Files changed (3) hide show
  1. README.md +20 -12
  2. app.py +92 -109
  3. requirements.txt +8 -7
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: YouTube to LinkedIn Post Converter
3
- emoji: πŸŽ₯
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
@@ -9,11 +9,11 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # YouTube to LinkedIn Post Converter
13
 
14
- Transform your YouTube videos into professional LinkedIn posts with AI-powered content enhancement. This application:
15
 
16
- - Extracts transcripts from YouTube videos
17
  - Enhances content using AI
18
  - Formats posts for LinkedIn
19
  - Verifies content quality
@@ -21,7 +21,7 @@ Transform your YouTube videos into professional LinkedIn posts with AI-powered c
21
 
22
  ## Features
23
 
24
- - πŸŽ₯ YouTube video processing
25
  - ✨ AI-powered content enhancement
26
  - πŸ”— LinkedIn post formatting
27
  - βœ“ Content verification
@@ -29,14 +29,22 @@ Transform your YouTube videos into professional LinkedIn posts with AI-powered c
29
 
30
  ## How to Use
31
 
32
- 1. Enter a YouTube video URL
33
  2. Click "Generate Post"
34
  3. Review the enhanced content
35
  4. Copy your LinkedIn-ready post
36
 
37
- ## Sample Videos
38
 
39
- Try these videos to test the application:
40
- - Open AI video: https://www.youtube.com/watch?v=LsMxX86mm2Y
41
- - Financial News: https://www.youtube.com/watch?v=hvP1UNALZ3g
42
- - Video About AI: https://www.youtube.com/watch?v=Yq0QkCxoTHM
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: PDF to LinkedIn Post Converter
3
+ emoji: πŸ“„
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
 
9
  pinned: false
10
  ---
11
 
12
+ # PDF to LinkedIn Post Converter
13
 
14
+ Transform your PDF documents into professional LinkedIn posts with AI-powered content enhancement. This application:
15
 
16
+ - Extracts content from PDF files
17
  - Enhances content using AI
18
  - Formats posts for LinkedIn
19
  - Verifies content quality
 
21
 
22
  ## Features
23
 
24
+ - πŸ“„ PDF document processing
25
  - ✨ AI-powered content enhancement
26
  - πŸ”— LinkedIn post formatting
27
  - βœ“ Content verification
 
29
 
30
  ## How to Use
31
 
32
+ 1. Upload a PDF file
33
  2. Click "Generate Post"
34
  3. Review the enhanced content
35
  4. Copy your LinkedIn-ready post
36
 
37
+ ## Tips for Best Results
38
 
39
+ - Use well-formatted PDFs with clear text
40
+ - Optimal length: 2-10 pages
41
+ - Ensure PDFs have readable text (not scanned images)
42
+ - Review and personalize the post before sharing
43
+ - Consider your target audience when selecting content
44
+
45
+ ## Sample PDFs
46
+
47
+ Try these PDFs to test the application:
48
+ - Open AI PDF: https://example.com/open-ai.pdf
49
+ - Financial News PDF: https://example.com/financial-news.pdf
50
+ - PDF About AI: https://example.com/ai.pdf
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import os
3
  from dotenv import load_dotenv
4
- from youtube_transcript_api import YouTubeTranscriptApi
5
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
6
  from langchain.prompts import ChatPromptTemplate
7
  from langchain_core.output_parsers import StrOutputParser
@@ -12,6 +11,7 @@ from langchain_chroma import Chroma
12
  from langchain.schema import Document
13
  from datetime import datetime
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
15
 
16
  # Load environment variables
17
  load_dotenv(verbose=True)
@@ -22,8 +22,8 @@ if not os.getenv("OPENAI_API_KEY"):
22
 
23
  # Define state types
24
  class ProcessState(TypedDict):
25
- video_url: str
26
- transcript: str
27
  enhanced: str
28
  linkedin_post: str
29
  verification: dict
@@ -34,29 +34,28 @@ class ProcessState(TypedDict):
34
  needs_improvement: bool
35
  research_context: str
36
 
37
- def extract_video_id(url: str) -> str:
38
- """Extract video ID from YouTube URL."""
39
- if "youtu.be" in url:
40
- return url.split("/")[-1]
41
- return url.split("v=")[-1].split("&")[0]
 
 
 
 
 
42
 
43
- def get_transcript(state: ProcessState, progress=gr.Progress()) -> ProcessState:
44
- """Get transcript from YouTube video."""
45
  try:
46
- progress(0.25, desc="Fetching transcript...")
47
- video_id = extract_video_id(state["video_url"])
48
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
49
- state["transcript"] = " ".join([segment["text"] for segment in transcript])
50
- state["status"] = "βœ… Transcript fetched"
51
  return state
52
  except Exception as e:
53
- error_message = str(e).lower()
54
- if "too many requests" in error_message or "429" in error_message:
55
- state["error"] = "⚠️ YouTube API rate limit reached. Please wait a few minutes and try again."
56
- state["status"] = "❌ Rate limit exceeded"
57
- else:
58
- state["error"] = f"⚠️ Error fetching transcript: {str(e)}"
59
- state["status"] = "❌ Failed to fetch transcript"
60
  return state
61
 
62
  def get_chroma_collection():
@@ -72,9 +71,9 @@ def get_chroma_collection():
72
  raise Exception(f"Error creating Chroma collection: {str(e)}")
73
 
74
  def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
75
- """Enhance the transcript content with semantic search and similarity analysis."""
76
  try:
77
- if not state["transcript"]:
78
  return state
79
 
80
  progress(0.50, desc="Enhancing content...")
@@ -82,14 +81,14 @@ def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState
82
  # Get similar content from the vector store
83
  collection = get_chroma_collection()
84
  similar_docs = collection.similarity_search(
85
- state["transcript"],
86
  k=3
87
  )
88
 
89
  # Initialize LLM for content generation
90
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
91
  prompt = ChatPromptTemplate.from_messages([
92
- ("system", """You are an expert content enhancer. Transform this transcript into engaging content:
93
 
94
  1. Identify and emphasize key points
95
  2. Add context and examples
@@ -97,8 +96,8 @@ def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState
97
  4. Keep it concise (max 3000 characters)
98
  5. Maintain factual accuracy
99
 
100
- Transcript:
101
- {transcript}
102
 
103
  Similar Content for Context:
104
  {similar_content}
@@ -108,7 +107,7 @@ Similar Content for Context:
108
 
109
  chain = prompt | llm | StrOutputParser()
110
  state["enhanced"] = chain.invoke({
111
- "transcript": state["transcript"],
112
  "similar_content": "\n".join([doc.page_content for doc in similar_docs])
113
  })
114
  state["status"] = "βœ… Content enhanced"
@@ -176,7 +175,7 @@ Remember: The goal is to make the content more engaging while keeping ALL the or
176
  def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
177
  """Verify the enhanced content against the original using semantic similarity."""
178
  try:
179
- if not state["enhanced"] or not state["transcript"]:
180
  return state
181
 
182
  progress(1.0, desc="Verifying content...")
@@ -195,7 +194,7 @@ def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
195
  if similar_docs:
196
  # Chroma returns a list of Document objects with a score attribute
197
  # But the default similarity_search does not return scores, so we just check if content is similar
198
- similarity_score = 1.0 if similar_docs[0].page_content == state["transcript"] else 0.0
199
 
200
  # Initialize LLM for verification
201
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
@@ -225,7 +224,7 @@ Semantic Similarity Score: {similarity_score}"""),
225
 
226
  chain = prompt | llm | StrOutputParser()
227
  verification_result = json.loads(chain.invoke({
228
- "original": state["transcript"],
229
  "enhanced": state["enhanced"],
230
  "similarity_score": similarity_score
231
  }))
@@ -262,7 +261,7 @@ def create_workflow() -> StateGraph:
262
  workflow = StateGraph(ProcessState)
263
 
264
  # Add nodes
265
- workflow.add_node("get_transcript", get_transcript)
266
  workflow.add_node("enhance_content", enhance_content)
267
  workflow.add_node("format_linkedin", format_linkedin_post)
268
  workflow.add_node("verify_content", verify_content)
@@ -271,10 +270,10 @@ def create_workflow() -> StateGraph:
271
  workflow.add_node("enhance_again", enhance_again)
272
 
273
  # Set entry point
274
- workflow.set_entry_point("get_transcript")
275
 
276
  # Add edges for main flow
277
- workflow.add_edge("get_transcript", "enhance_content")
278
  workflow.add_edge("enhance_content", "format_linkedin")
279
  workflow.add_edge("format_linkedin", "verify_content")
280
  workflow.add_edge("verify_content", "agent_decide")
@@ -295,7 +294,7 @@ def create_workflow() -> StateGraph:
295
 
296
  # Add conditional edges for error handling
297
  workflow.add_conditional_edges(
298
- "get_transcript",
299
  should_continue,
300
  {
301
  True: "enhance_content",
@@ -419,11 +418,11 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
419
  # Select appropriate workflow based on stage
420
  if start_stage == "enhance":
421
  workflow = create_workflow()
422
- if not state["transcript"]:
423
  return (
424
- "⚠️ No transcript available to enhance",
425
- "❌ Failed: No transcript",
426
- state.get("transcript", ""),
427
  "",
428
  "",
429
  ""
@@ -434,7 +433,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
434
  return (
435
  "⚠️ No enhanced content available to format",
436
  "❌ Failed: No enhanced content",
437
- state.get("transcript", ""),
438
  state.get("enhanced", ""),
439
  "",
440
  ""
@@ -457,7 +456,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
457
  return (
458
  final_state.get("error", ""),
459
  final_state.get("status", ""),
460
- final_state.get("transcript", ""),
461
  final_state.get("enhanced", ""),
462
  final_state.get("linkedin_post", ""),
463
  verification_text
@@ -467,7 +466,7 @@ def process_from_stage(state: ProcessState, start_stage: str, progress=gr.Progre
467
  return (
468
  f"⚠️ Error: {str(e)}",
469
  "❌ Processing failed",
470
- state.get("transcript", ""),
471
  state.get("enhanced", ""),
472
  state.get("linkedin_post", ""),
473
  ""
@@ -555,8 +554,8 @@ def format_research_results(research: dict) -> str:
555
  def create_ui():
556
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
557
  current_state = gr.State({
558
- "video_url": "",
559
- "transcript": "",
560
  "enhanced": "",
561
  "linkedin_post": "",
562
  "verification": {},
@@ -570,33 +569,25 @@ def create_ui():
570
 
571
  gr.Markdown(
572
  """
573
- # YouTube to LinkedIn Post Converter
574
- Transform your YouTube videos into professional LinkedIn posts with AI content enhancement.
575
 
576
- ### 🎬 Sample Videos to Try
577
- Copy any of these URLs to test the application:
578
- ```
579
- 1. Open AI video: https://www.youtube.com/watch?v=LsMxX86mm2Y
580
- Agent will likely find high quality initial content and not improve
581
-
582
- 2. Financial News: https://www.youtube.com/watch?v=hvP1UNALZ3g
583
- Agent will likely decide to not improve this post
584
-
585
- 3. Video About AI: https://www.youtube.com/watch?v=Yq0QkCxoTHM
586
- Agent will likely decide to improve this post
587
- ```
588
- These videos are chosen to show the application's ability to handle different types of professional content.
589
  """
590
  )
591
 
592
  with gr.Row():
593
  with gr.Column():
594
- video_url = gr.Textbox(
595
- label="YouTube URL",
596
- placeholder="https://www.youtube.com/watch?v=e1GJ5tZePjk",
597
- show_label=True
598
  )
599
- youtube_convert_btn = gr.Button("πŸš€ Generate from YouTube", variant="primary", size="lg")
600
 
601
  status = gr.Textbox(
602
  label="Status",
@@ -614,8 +605,8 @@ def create_ui():
614
  with gr.TabItem("πŸ“ Content"):
615
  with gr.Row():
616
  with gr.Column():
617
- transcript = gr.TextArea(
618
- label="πŸ“„ Raw Transcript",
619
  interactive=False,
620
  show_copy_button=True,
621
  lines=8
@@ -680,7 +671,7 @@ def create_ui():
680
 
681
  # Loading indicators
682
  with gr.Row(visible=False) as loading_indicators:
683
- transcript_loading = gr.Markdown("πŸ”„ Fetching transcript...")
684
  enhanced_loading = gr.Markdown("πŸ”„ Enhancing content...")
685
  linkedin_loading = gr.Markdown("πŸ”„ Formatting for LinkedIn...")
686
  verify_loading = gr.Markdown("πŸ”„ Verifying content...")
@@ -692,9 +683,9 @@ def create_ui():
692
  gr.Markdown(
693
  """
694
  ### How to Use
695
- 1. **Input**: Paste a YouTube video URL in the input field
696
  2. **Process**: Click the "Generate Post" button
697
- 3. **Wait**: The system will process your video through multiple steps
698
  4. **Review**: Check the generated content in each tab
699
  5. **Copy**: Use the copy button to grab your LinkedIn post
700
 
@@ -703,19 +694,18 @@ def create_ui():
703
  - Click πŸ”„ next to "LinkedIn Post" to regenerate from the formatting stage
704
 
705
  ### πŸ’‘ Tips for Best Results
706
- - Use videos with clear English audio
707
- - Optimal video length: 5-15 minutes
708
- - Ensure videos have accurate captions
709
  - Review and personalize the post before sharing
710
- - Consider your target audience when selecting videos
711
-
712
  """
713
  )
714
 
715
  def update_loading_state(stage: str):
716
  """Update loading indicators based on current stage."""
717
  states = {
718
- "transcript": [True, False, False, False, False, False, False],
719
  "enhance": [False, True, False, False, False, False, False],
720
  "format": [False, False, True, False, False, False, False],
721
  "verify": [False, False, False, True, False, False, False],
@@ -727,7 +717,7 @@ def create_ui():
727
 
728
  # Loading messages for each stage
729
  loading_messages = {
730
- "transcript": "πŸ”„ Fetching transcript...\n⏳ Please wait...",
731
  "enhance": "✨ Enhancing content...\n⚑ AI is working its magic...",
732
  "format": "🎨 Formatting for LinkedIn...\nπŸ“ Creating engaging post...",
733
  "verify": "πŸ” Verifying content...\nβš–οΈ Checking accuracy...",
@@ -744,8 +734,8 @@ def create_ui():
744
  gr.update(visible=state) for state in states.get(stage, [False] * 7)
745
  ], current_message
746
 
747
- def process_with_loading(url, state):
748
- """Process video with loading indicators."""
749
  try:
750
  # Initialize state if needed
751
  if "improvement_plan" not in state:
@@ -758,11 +748,11 @@ def create_ui():
758
  state["needs_improvement"] = False
759
 
760
  # Show loading indicators
761
- loading_states, message = update_loading_state("transcript")
762
  yield [
763
  "", # error
764
  "Processing...", # status
765
- message, # transcript (loading)
766
  "", # enhanced
767
  "", # linkedin
768
  "", # verification
@@ -773,16 +763,16 @@ def create_ui():
773
  *loading_states # loading indicators
774
  ]
775
 
776
- # Get transcript
777
- state["video_url"] = url
778
- transcript_text = get_transcript(state)["transcript"]
779
 
780
  # Show enhancing state
781
  loading_states, message = update_loading_state("enhance")
782
  yield [
783
  "",
784
  "Enhancing content...",
785
- transcript_text,
786
  message, # enhanced (loading)
787
  "",
788
  "",
@@ -794,7 +784,7 @@ def create_ui():
794
  ]
795
 
796
  # Enhance content
797
- state["transcript"] = transcript_text
798
  enhanced_state = enhance_content(state)
799
  enhanced_text = enhanced_state["enhanced"]
800
 
@@ -803,7 +793,7 @@ def create_ui():
803
  yield [
804
  "",
805
  "Formatting for LinkedIn...",
806
- transcript_text,
807
  enhanced_text,
808
  message, # linkedin (loading)
809
  "",
@@ -824,7 +814,7 @@ def create_ui():
824
  yield [
825
  "",
826
  "Verifying content...",
827
- transcript_text,
828
  enhanced_text,
829
  linkedin_text,
830
  "πŸ” Verifying...\nβš–οΈ Analyzing accuracy...", # verification (loading)
@@ -851,7 +841,7 @@ def create_ui():
851
  yield [
852
  "",
853
  f"Creating improvement plan (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
854
- transcript_text,
855
  enhanced_text,
856
  linkedin_text,
857
  verification_text,
@@ -867,7 +857,7 @@ def create_ui():
867
  yield [
868
  "",
869
  f"Researching content (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
870
- transcript_text,
871
  enhanced_text,
872
  linkedin_text,
873
  verification_text,
@@ -887,7 +877,7 @@ def create_ui():
887
  yield [
888
  "",
889
  f"Enhancing content again (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
890
- transcript_text,
891
  enhanced_text,
892
  linkedin_text,
893
  verification_text,
@@ -921,7 +911,7 @@ def create_ui():
921
  yield [
922
  "",
923
  f"Creating improved LinkedIn post (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
924
- transcript_text,
925
  enhanced_text,
926
  linkedin_text,
927
  verification_text,
@@ -944,7 +934,7 @@ def create_ui():
944
  yield [
945
  "",
946
  "βœ… Processing complete!",
947
- transcript_text,
948
  enhanced_text,
949
  linkedin_text,
950
  verification_text,
@@ -960,7 +950,7 @@ def create_ui():
960
  yield [
961
  f"⚠️ Error: {str(e)}",
962
  "❌ Processing failed",
963
- state.get("transcript", ""),
964
  state.get("enhanced", ""),
965
  state.get("linkedin_post", ""),
966
  "",
@@ -972,13 +962,13 @@ def create_ui():
972
  ]
973
 
974
  # Set up event handlers
975
- youtube_convert_btn.click(
976
  fn=process_with_loading,
977
- inputs=[video_url, current_state],
978
  outputs=[
979
  error,
980
  status,
981
- transcript,
982
  enhanced,
983
  linkedin,
984
  verification,
@@ -986,7 +976,7 @@ def create_ui():
986
  research_results,
987
  improved_linkedin,
988
  current_state,
989
- transcript_loading,
990
  enhanced_loading,
991
  linkedin_loading,
992
  verify_loading,
@@ -1011,7 +1001,7 @@ def create_ui():
1011
  return {
1012
  loading: gr.update(visible=is_loading)
1013
  for loading in [
1014
- transcript_loading,
1015
  enhanced_loading,
1016
  linkedin_loading,
1017
  verify_loading,
@@ -1021,10 +1011,10 @@ def create_ui():
1021
  ]
1022
  }
1023
 
1024
- youtube_convert_btn.click(
1025
  lambda: update_loading_visibility(True),
1026
  None,
1027
- [transcript_loading, enhanced_loading, linkedin_loading,
1028
  verify_loading, plan_loading, research_loading, improved_loading],
1029
  queue=False
1030
  )
@@ -1239,11 +1229,4 @@ Important:
1239
 
1240
  if __name__ == "__main__":
1241
  demo = create_ui()
1242
- demo.queue() # Enable queuing for better handling of concurrent requests
1243
- demo.launch(
1244
- server_name="0.0.0.0", # Required for Hugging Face Spaces
1245
- server_port=7860, # Standard port for Hugging Face Spaces
1246
- show_error=True,
1247
- share=False, # Disable sharing for production
1248
- show_api=False
1249
- )
 
1
  import gradio as gr
2
  import os
3
  from dotenv import load_dotenv
 
4
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
5
  from langchain.prompts import ChatPromptTemplate
6
  from langchain_core.output_parsers import StrOutputParser
 
11
  from langchain.schema import Document
12
  from datetime import datetime
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from pypdf import PdfReader
15
 
16
  # Load environment variables
17
  load_dotenv(verbose=True)
 
22
 
23
  # Define state types
24
  class ProcessState(TypedDict):
25
+ pdf_file: str
26
+ content: str
27
  enhanced: str
28
  linkedin_post: str
29
  verification: dict
 
34
  needs_improvement: bool
35
  research_context: str
36
 
37
+ def extract_pdf_content(pdf_file: str) -> str:
38
+ """Extract text content from PDF file."""
39
+ try:
40
+ reader = PdfReader(pdf_file)
41
+ text = ""
42
+ for page in reader.pages:
43
+ text += page.extract_text() + "\n"
44
+ return text.strip()
45
+ except Exception as e:
46
+ raise Exception(f"Error extracting PDF content: {str(e)}")
47
 
48
+ def get_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
49
+ """Get content from PDF file."""
50
  try:
51
+ progress(0.25, desc="Extracting PDF content...")
52
+ content = extract_pdf_content(state["pdf_file"])
53
+ state["content"] = content
54
+ state["status"] = "βœ… PDF content extracted"
 
55
  return state
56
  except Exception as e:
57
+ state["error"] = f"⚠️ Error extracting PDF content: {str(e)}"
58
+ state["status"] = "❌ Failed to extract PDF content"
 
 
 
 
 
59
  return state
60
 
61
  def get_chroma_collection():
 
71
  raise Exception(f"Error creating Chroma collection: {str(e)}")
72
 
73
  def enhance_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
74
+ """Enhance the PDF content with semantic search and similarity analysis."""
75
  try:
76
+ if not state["content"]:
77
  return state
78
 
79
  progress(0.50, desc="Enhancing content...")
 
81
  # Get similar content from the vector store
82
  collection = get_chroma_collection()
83
  similar_docs = collection.similarity_search(
84
+ state["content"],
85
  k=3
86
  )
87
 
88
  # Initialize LLM for content generation
89
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
90
  prompt = ChatPromptTemplate.from_messages([
91
+ ("system", """You are an expert content enhancer. Transform this PDF content into engaging content:
92
 
93
  1. Identify and emphasize key points
94
  2. Add context and examples
 
96
  4. Keep it concise (max 3000 characters)
97
  5. Maintain factual accuracy
98
 
99
+ Content:
100
+ {content}
101
 
102
  Similar Content for Context:
103
  {similar_content}
 
107
 
108
  chain = prompt | llm | StrOutputParser()
109
  state["enhanced"] = chain.invoke({
110
+ "content": state["content"],
111
  "similar_content": "\n".join([doc.page_content for doc in similar_docs])
112
  })
113
  state["status"] = "βœ… Content enhanced"
 
175
  def verify_content(state: ProcessState, progress=gr.Progress()) -> ProcessState:
176
  """Verify the enhanced content against the original using semantic similarity."""
177
  try:
178
+ if not state["enhanced"] or not state["content"]:
179
  return state
180
 
181
  progress(1.0, desc="Verifying content...")
 
194
  if similar_docs:
195
  # Chroma returns a list of Document objects with a score attribute
196
  # But the default similarity_search does not return scores, so we just check if content is similar
197
+ similarity_score = 1.0 if similar_docs[0].page_content == state["content"] else 0.0
198
 
199
  # Initialize LLM for verification
200
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
 
224
 
225
  chain = prompt | llm | StrOutputParser()
226
  verification_result = json.loads(chain.invoke({
227
+ "original": state["content"],
228
  "enhanced": state["enhanced"],
229
  "similarity_score": similarity_score
230
  }))
 
261
  workflow = StateGraph(ProcessState)
262
 
263
  # Add nodes
264
+ workflow.add_node("get_content", get_content)
265
  workflow.add_node("enhance_content", enhance_content)
266
  workflow.add_node("format_linkedin", format_linkedin_post)
267
  workflow.add_node("verify_content", verify_content)
 
270
  workflow.add_node("enhance_again", enhance_again)
271
 
272
  # Set entry point
273
+ workflow.set_entry_point("get_content")
274
 
275
  # Add edges for main flow
276
+ workflow.add_edge("get_content", "enhance_content")
277
  workflow.add_edge("enhance_content", "format_linkedin")
278
  workflow.add_edge("format_linkedin", "verify_content")
279
  workflow.add_edge("verify_content", "agent_decide")
 
294
 
295
  # Add conditional edges for error handling
296
  workflow.add_conditional_edges(
297
+ "get_content",
298
  should_continue,
299
  {
300
  True: "enhance_content",
 
418
  # Select appropriate workflow based on stage
419
  if start_stage == "enhance":
420
  workflow = create_workflow()
421
+ if not state["content"]:
422
  return (
423
+ "⚠️ No content available to enhance",
424
+ "❌ Failed: No content",
425
+ state.get("content", ""),
426
  "",
427
  "",
428
  ""
 
433
  return (
434
  "⚠️ No enhanced content available to format",
435
  "❌ Failed: No enhanced content",
436
+ state.get("content", ""),
437
  state.get("enhanced", ""),
438
  "",
439
  ""
 
456
  return (
457
  final_state.get("error", ""),
458
  final_state.get("status", ""),
459
+ final_state.get("content", ""),
460
  final_state.get("enhanced", ""),
461
  final_state.get("linkedin_post", ""),
462
  verification_text
 
466
  return (
467
  f"⚠️ Error: {str(e)}",
468
  "❌ Processing failed",
469
+ state.get("content", ""),
470
  state.get("enhanced", ""),
471
  state.get("linkedin_post", ""),
472
  ""
 
554
  def create_ui():
555
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
556
  current_state = gr.State({
557
+ "pdf_file": "",
558
+ "content": "",
559
  "enhanced": "",
560
  "linkedin_post": "",
561
  "verification": {},
 
569
 
570
  gr.Markdown(
571
  """
572
+ # PDF to LinkedIn Post Converter
573
+ Transform your PDF documents into professional LinkedIn posts with AI content enhancement.
574
 
575
+ ### πŸ“„ How to Use
576
+ 1. Upload a PDF file
577
+ 2. Click "Generate Post"
578
+ 3. Review the enhanced content
579
+ 4. Copy your LinkedIn-ready post
 
 
 
 
 
 
 
 
580
  """
581
  )
582
 
583
  with gr.Row():
584
  with gr.Column():
585
+ pdf_file = gr.File(
586
+ label="PDF File",
587
+ file_types=[".pdf"],
588
+ type="filepath"
589
  )
590
+ convert_btn = gr.Button("πŸš€ Generate from PDF", variant="primary", size="lg")
591
 
592
  status = gr.Textbox(
593
  label="Status",
 
605
  with gr.TabItem("πŸ“ Content"):
606
  with gr.Row():
607
  with gr.Column():
608
+ content = gr.TextArea(
609
+ label="πŸ“„ Raw Content",
610
  interactive=False,
611
  show_copy_button=True,
612
  lines=8
 
671
 
672
  # Loading indicators
673
  with gr.Row(visible=False) as loading_indicators:
674
+ content_loading = gr.Markdown("πŸ”„ Extracting content...")
675
  enhanced_loading = gr.Markdown("πŸ”„ Enhancing content...")
676
  linkedin_loading = gr.Markdown("πŸ”„ Formatting for LinkedIn...")
677
  verify_loading = gr.Markdown("πŸ”„ Verifying content...")
 
683
  gr.Markdown(
684
  """
685
  ### How to Use
686
+ 1. **Input**: Upload a PDF file
687
  2. **Process**: Click the "Generate Post" button
688
+ 3. **Wait**: The system will process your PDF through multiple steps
689
  4. **Review**: Check the generated content in each tab
690
  5. **Copy**: Use the copy button to grab your LinkedIn post
691
 
 
694
  - Click πŸ”„ next to "LinkedIn Post" to regenerate from the formatting stage
695
 
696
  ### πŸ’‘ Tips for Best Results
697
+ - Use well-formatted PDFs with clear text
698
+ - Optimal length: 2-10 pages
699
+ - Ensure PDFs have readable text (not scanned images)
700
  - Review and personalize the post before sharing
701
+ - Consider your target audience when selecting content
 
702
  """
703
  )
704
 
705
  def update_loading_state(stage: str):
706
  """Update loading indicators based on current stage."""
707
  states = {
708
+ "content": [True, False, False, False, False, False, False],
709
  "enhance": [False, True, False, False, False, False, False],
710
  "format": [False, False, True, False, False, False, False],
711
  "verify": [False, False, False, True, False, False, False],
 
717
 
718
  # Loading messages for each stage
719
  loading_messages = {
720
+ "content": "πŸ”„ Extracting content...\n⏳ Please wait...",
721
  "enhance": "✨ Enhancing content...\n⚑ AI is working its magic...",
722
  "format": "🎨 Formatting for LinkedIn...\nπŸ“ Creating engaging post...",
723
  "verify": "πŸ” Verifying content...\nβš–οΈ Checking accuracy...",
 
734
  gr.update(visible=state) for state in states.get(stage, [False] * 7)
735
  ], current_message
736
 
737
+ def process_with_loading(pdf_path, state):
738
+ """Process PDF with loading indicators."""
739
  try:
740
  # Initialize state if needed
741
  if "improvement_plan" not in state:
 
748
  state["needs_improvement"] = False
749
 
750
  # Show loading indicators
751
+ loading_states, message = update_loading_state("content")
752
  yield [
753
  "", # error
754
  "Processing...", # status
755
+ message, # content (loading)
756
  "", # enhanced
757
  "", # linkedin
758
  "", # verification
 
763
  *loading_states # loading indicators
764
  ]
765
 
766
+ # Get content
767
+ state["pdf_file"] = pdf_path
768
+ content_text = get_content(state)["content"]
769
 
770
  # Show enhancing state
771
  loading_states, message = update_loading_state("enhance")
772
  yield [
773
  "",
774
  "Enhancing content...",
775
+ content_text,
776
  message, # enhanced (loading)
777
  "",
778
  "",
 
784
  ]
785
 
786
  # Enhance content
787
+ state["content"] = content_text
788
  enhanced_state = enhance_content(state)
789
  enhanced_text = enhanced_state["enhanced"]
790
 
 
793
  yield [
794
  "",
795
  "Formatting for LinkedIn...",
796
+ content_text,
797
  enhanced_text,
798
  message, # linkedin (loading)
799
  "",
 
814
  yield [
815
  "",
816
  "Verifying content...",
817
+ content_text,
818
  enhanced_text,
819
  linkedin_text,
820
  "πŸ” Verifying...\nβš–οΈ Analyzing accuracy...", # verification (loading)
 
841
  yield [
842
  "",
843
  f"Creating improvement plan (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
844
+ content_text,
845
  enhanced_text,
846
  linkedin_text,
847
  verification_text,
 
857
  yield [
858
  "",
859
  f"Researching content (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
860
+ content_text,
861
  enhanced_text,
862
  linkedin_text,
863
  verification_text,
 
877
  yield [
878
  "",
879
  f"Enhancing content again (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
880
+ content_text,
881
  enhanced_text,
882
  linkedin_text,
883
  verification_text,
 
911
  yield [
912
  "",
913
  f"Creating improved LinkedIn post (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
914
+ content_text,
915
  enhanced_text,
916
  linkedin_text,
917
  verification_text,
 
934
  yield [
935
  "",
936
  "βœ… Processing complete!",
937
+ content_text,
938
  enhanced_text,
939
  linkedin_text,
940
  verification_text,
 
950
  yield [
951
  f"⚠️ Error: {str(e)}",
952
  "❌ Processing failed",
953
+ state.get("content", ""),
954
  state.get("enhanced", ""),
955
  state.get("linkedin_post", ""),
956
  "",
 
962
  ]
963
 
964
  # Set up event handlers
965
+ convert_btn.click(
966
  fn=process_with_loading,
967
+ inputs=[pdf_file, current_state],
968
  outputs=[
969
  error,
970
  status,
971
+ content,
972
  enhanced,
973
  linkedin,
974
  verification,
 
976
  research_results,
977
  improved_linkedin,
978
  current_state,
979
+ content_loading,
980
  enhanced_loading,
981
  linkedin_loading,
982
  verify_loading,
 
1001
  return {
1002
  loading: gr.update(visible=is_loading)
1003
  for loading in [
1004
+ content_loading,
1005
  enhanced_loading,
1006
  linkedin_loading,
1007
  verify_loading,
 
1011
  ]
1012
  }
1013
 
1014
+ convert_btn.click(
1015
  lambda: update_loading_visibility(True),
1016
  None,
1017
+ [content_loading, enhanced_loading, linkedin_loading,
1018
  verify_loading, plan_loading, research_loading, improved_loading],
1019
  queue=False
1020
  )
 
1229
 
1230
  if __name__ == "__main__":
1231
  demo = create_ui()
1232
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- gradio>=4.19.2
2
- python-dotenv>=1.0.1
3
  youtube-transcript-api>=0.6.2
4
- langchain-openai>=0.0.8
5
- langchain>=0.1.9
6
- langgraph>=0.0.27
7
- langchain-community>=0.0.27
8
- langchain-chroma>=0.1.4
 
 
1
+ gradio>=4.0.0
2
+ python-dotenv>=1.0.0
3
  youtube-transcript-api>=0.6.2
4
+ langchain-openai>=0.0.5
5
+ langchain>=0.1.0
6
+ langgraph>=0.0.20
7
+ langchain-community>=0.0.13
8
+ langchain-chroma>=0.0.5
9
+ pypdf>=3.17.1