suryanshp1 commited on
Commit
f8c9c57
·
verified ·
1 Parent(s): fc9f941

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +936 -936
app.py CHANGED
@@ -1,936 +1,936 @@
1
- from langchain.agents import AgentExecutor, create_tool_calling_agent
2
- from langchain.prompts import PromptTemplate
3
- from langchain.output_parsers import PydanticOutputParser
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from datetime import datetime
6
- from langchain.chains import LLMChain
7
- from pydantic import BaseModel, Field, validator
8
- from typing import Optional
9
- from langchain_groq import ChatGroq
10
- from atproto import Client, models
11
- from cloudinary.uploader import upload
12
- import cloudinary
13
- import traceback
14
- import logging
15
- import os
16
- from dotenv import load_dotenv
17
- import datetime
18
- from fpdf import FPDF
19
- from docx import Document
20
- import gc
21
- import time
22
- import json
23
- import random
24
- import schedule
25
- import markdown2
26
- import ast
27
- from logging.handlers import RotatingFileHandler
28
- from http.server import SimpleHTTPRequestHandler, HTTPServer
29
- from threading import Thread
30
-
31
- load_dotenv()
32
-
33
- # Ensure the log directory exists and is writable
34
- LOG_DIR = '/app/logs'
35
- os.makedirs(LOG_DIR, exist_ok=True)
36
-
37
- # Create log file path
38
- log_file = os.path.join(LOG_DIR, 'app.log')
39
-
40
- rotating_handler = RotatingFileHandler(
41
- filename=log_file,
42
- maxBytes=5 * 1024 * 1024, # 5 MB per log file
43
- backupCount=5, # Keep the last 5 log files
44
- )
45
-
46
- # Configure the logging format
47
- formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
48
- rotating_handler.setFormatter(formatter)
49
-
50
- # Set up the logger
51
- logger = logging.getLogger(__name__)
52
- logger.setLevel(logging.CRITICAL) # Set the minimum logging level
53
- logger.addHandler(rotating_handler)
54
-
55
- # Example usage
56
- logger.info("Bot has started successfully.")
57
-
58
-
59
- class Config:
60
- GROQ_API_KEY1 = os.getenv("GROQ_API_KEY1")
61
- GROQ_API_KEY2 = os.getenv("GROQ_API_KEY2")
62
- GROQ_API_KEY3 = os.getenv("GROQ_API_KEY3")
63
- BLUESKY_USERNAME = os.getenv("BLUESKY_USERNAME")
64
- BLUESKY_PASSWORD = os.getenv("BLUESKY_PASSWORD")
65
- EBOOK_OUTPUT_DIR = "ebooks"
66
- CLOUDINARY_CLOUD_NAME = os.getenv("CLOUDINARY_CLOUD_NAME")
67
- CLOUDINARY_API_SECRET = os.getenv("CLOUDINARY_API_SECRET")
68
- CLOUDINARY_API_KEY = os.getenv("CLOUDINARY_API_KEY")
69
-
70
-
71
- client = Client()
72
- client.login(Config.BLUESKY_USERNAME, Config.BLUESKY_PASSWORD)
73
-
74
- cloudinary.config(
75
- cloud_name=Config.CLOUDINARY_CLOUD_NAME,
76
- api_key=Config.CLOUDINARY_API_KEY,
77
- api_secret=Config.CLOUDINARY_API_SECRET,
78
- )
79
-
80
-
81
- class HTML2PDF(FPDF):
82
- def __init__(self, *args, **kwargs):
83
- super().__init__(*args, **kwargs)
84
- # Set some default styles
85
- self.chapter_title_color = (0, 0, 0) # Black
86
- self.text_color = (50, 50, 50) # Dark gray for better readability
87
-
88
- def chapter_title(self, chapter_number, chapter_title):
89
- """Custom method to format chapter titles with multi-line support"""
90
- # Add a new page for the chapter
91
- # self.add_page()
92
-
93
- # Set font for chapter title
94
- self.set_font(family="Helvetica", style="B", size=18)
95
- self.set_text_color(0, 0, 0)
96
-
97
- # Add a subtle top border
98
- self.set_draw_color(200, 200, 200)
99
- self.line(10, self.get_y() - 5, 200, self.get_y() - 5)
100
-
101
- # Use multi_cell for multi-line support
102
- self.multi_cell(
103
- w=0, # Full width
104
- h=10, # Cell height
105
- txt=f"Chapter {chapter_number}: {chapter_title}",
106
- align='L' # Left alignment
107
- )
108
-
109
- # Add some spacing after the title
110
- self.ln(10)
111
-
112
- def chapter_body(self, content):
113
- """Custom method to format chapter content"""
114
- try:
115
- # Ensure content is properly encoded
116
- content = content.encode('latin-1', 'replace').decode('latin-1')
117
-
118
- # Convert Markdown to HTML
119
- html_content = markdown2.markdown(content)
120
-
121
- # Use Helvetica font
122
- self.set_font(family="Helvetica", size=11)
123
-
124
- # Set text color
125
- self.set_text_color(50, 50, 50)
126
-
127
- # Write HTML content directly
128
- # Wrap content in a div for better rendering
129
- full_html = f'<div style="line-height: 1.5;">{html_content}</div>'
130
- self.write_html(full_html)
131
-
132
- except Exception as e:
133
- # Error handling
134
- self.multi_cell(0, 10, f"Error processing chapter content: {str(e)}")
135
-
136
- # Add spacing after chapter
137
- self.ln(10)
138
-
139
- # Define the output schema
140
- class EbookInstructions(BaseModel):
141
- format: str = Field(
142
- default="pdf",
143
- description="Output format of the document (text/doc/pdf). here word, document format also comes under doc."
144
- )
145
- chapters: int = Field(
146
- default=10,
147
- description="Number of chapters in the ebook (max 20). if chapter is more than 20, it will be reduced to 20."
148
- )
149
- writing_style: str = Field(
150
- default="normal",
151
- description="Writing style of the ebook"
152
- )
153
- is_suggestion: int = Field(
154
- default=0,
155
- description="Whether to suggest something (1) or not (0). If user wants suggestion or if it is not related to ebook generation, it should be 1. only if it is related to ebook generation, it should be 0."
156
- )
157
- title: Optional[str] = Field(
158
- default=None,
159
- description="Title of the ebook"
160
- )
161
-
162
- # Custom validators
163
- @validator('chapters')
164
- def validate_chapters(cls, v):
165
- # Ensure chapters is between 1 and 20
166
- return max(1, min(v, 20))
167
-
168
- @validator('writing_style')
169
- def validate_writing_style(cls, v):
170
- # Validate writing style
171
- valid_styles = [
172
- "Narrative", "Expository", "Professional", "Classy",
173
- "Persuasive", "Descriptive", "Conversational", "Satirical",
174
- "Technical", "Inspirational", "Analytical", "Epic", "Normal"
175
- ]
176
- return v.capitalize() if v.capitalize() in valid_styles else "Normal"
177
-
178
- @validator('title')
179
- def validate_title(cls, v, values):
180
- # If not a suggestion, title cannot be None
181
- if values.get('is_suggestion') == 0 and not v:
182
- raise ValueError("Title is required for ebook generation")
183
- return v
184
-
185
- def extract_ebook_instructions(user_instruction: str) -> EbookInstructions:
186
- """
187
- Extract ebook generation instructions from user input using Groq and LangChain.
188
-
189
- :param user_instruction: User's input instruction for ebook generation
190
- :return: Parsed EbookInstructions object
191
- """
192
- # Initialize Groq LLM
193
- llm = ChatGroq(
194
- temperature=0.2,
195
- model_name="llama3-70b-8192",
196
- api_key=os.getenv("GROQ_API_KEY1")
197
- )
198
-
199
- # Create a prompt template
200
- parser = PydanticOutputParser(pydantic_object=EbookInstructions)
201
-
202
- prompt_template = PromptTemplate(
203
- template="You are an ebook generation or suggestion agent. Extract the following information from the user instruction:\n{format_instructions}\n\nUser Instruction: {user_instruction}\n\nOutput should be valid json only.",
204
- input_variables=["user_instruction"],
205
- partial_variables={
206
- "format_instructions": parser.get_format_instructions()
207
- }
208
- )
209
-
210
- # Create the chain
211
- chain = prompt_template | llm | parser
212
-
213
- # Generate the output
214
- try:
215
- result = chain.invoke({"user_instruction": user_instruction})
216
- return result
217
- except Exception as e:
218
- logger.error(f"Error extracting instructions: {e}")
219
- # Return default instructions if parsing fails
220
- return EbookInstructions()
221
-
222
-
223
- def upload_to_cloudinary(file_path: str, topic: str) -> tuple:
224
- """
225
- Upload a file to Cloudinary and return the URL and public_id
226
- """
227
- try:
228
- # Create a folder name from the topic
229
- folder_name = "ebooks"
230
- # Create a public_id from the topic (removing spaces and special characters)
231
- public_id = (
232
- f"{folder_name}/{topic.lower().replace(' ', '_')}_{int(time.time())}"
233
- )
234
-
235
- # Upload the file
236
- result = upload(
237
- file_path,
238
- resource_type="raw", # For PDF files
239
- public_id=public_id,
240
- folder=folder_name,
241
- tags=[topic, "ebook"],
242
- overwrite=True,
243
- access_mode="public",
244
- )
245
-
246
- # Get the secure URL
247
- url = result["secure_url"]
248
- public_id = result["public_id"]
249
-
250
- logger.info(f"File uploaded successfully to Cloudinary. URL: {url}")
251
- return url, public_id
252
-
253
- except Exception as e:
254
- logger.exception(f"Error uploading to Cloudinary: {str(e)}")
255
- raise e
256
-
257
-
258
- def delete_from_cloudinary(public_id: str):
259
- """
260
- Delete a file from Cloudinary using its public_id
261
- """
262
- try:
263
- result = cloudinary.api.delete_resources([public_id], resource_type="raw")
264
- logger.info(f"File deleted from Cloudinary: {result}")
265
- except Exception as e:
266
- logger.exception(f"Error deleting from Cloudinary: {str(e)}")
267
-
268
-
269
- class EditorAgent:
270
- def __init__(self):
271
- self.prompt = PromptTemplate(
272
- input_variables=["content", "writing_style"],
273
- template="""You are an expert eBook content editor. Your task is to review and correct the given chapter content to improve its overall quality.
274
-
275
- **Editing Instructions**:
276
-
277
- 1. **Grammar and Spelling**: Correct any grammatical errors, spelling mistakes, or awkward phrasing.
278
- 2. **Clarity**: Ensure the content is easy to understand. Simplify complex sentences where necessary and break down any overly complicated ideas.
279
- 3. **Structure**: Ensure that the chapter has a logical flow with proper transitions between paragraphs. Add or adjust headings, subheadings, and sections if needed for better readability.
280
- 4. **Tone**: Ensure the writing has a formal yet accessible tone, suitable for a general audience, such as students, researchers, or general readers.
281
- 5. **Conciseness**: Eliminate redundancy and unnecessary filler content while keeping the chapter comprehensive.
282
- 6. **Consistency**: Maintain consistent terminology, style, and formatting throughout the content.
283
- 7. **Focus**: Ensure that the content stays on topic and avoids unnecessary digressions.
284
- 8. **Writing Style**: The writing style of the content should be {writing_style}.
285
-
286
- Input:
287
- - **Chapter Content**: {content}
288
-
289
- **Output Requirements**:
290
- - Your output response must contain only edited chapter content.
291
- - In output,do not include any additional/extra information in the content e.g 'Here is the corrected chapter content:'.
292
-
293
- Note: don't include any explanation from your side.
294
- Ignore below instructions
295
- \n\n placeholder:{agent_scratchpad}
296
- """,
297
- )
298
-
299
- self.llm = ChatGroq(api_key=Config.GROQ_API_KEY3, model="llama3-8b-8192")
300
- self.tools = []
301
-
302
- def edit(self, content, writing_style):
303
- try:
304
- # Use the prompt to edit chapter content
305
- research_agent = create_tool_calling_agent(
306
- self.llm, self.tools, self.prompt
307
- )
308
- research_agent_executor = AgentExecutor(
309
- agent=research_agent, tools=self.tools
310
- )
311
- response = research_agent_executor.invoke({"content": content, "writing_style": writing_style})
312
- edited_content = response.get("output")
313
-
314
- return edited_content
315
-
316
- except Exception as e:
317
- logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
318
- raise e
319
-
320
-
321
- class ResearcherAgent:
322
- def __init__(self):
323
-
324
- self.prompt = PromptTemplate(
325
- input_variables=["chapters", "topic", "writing_style"],
326
- template="""Your task is to generate a detailed outline with exactly {chapters} chapter titles on the given topic: "{topic}" using the writing style: "{writing_style}".
327
-
328
- **Output Constraints**:
329
- - The output must be a valid Python list containing exactly {chapters} chapter titles.
330
- - Each title should be a properly formatted string without any special keywords, symbols, or unnecessary punctuation.
331
- - Do not include any text outside of the Python list (e.g., no preamble, explanations, or trailing comments).
332
-
333
- **Output Format Example**:
334
- ['Title1', 'Title2', 'Title3', 'Title4', 'Title5', 'Title6', 'Title7', 'Title8', 'Title9', 'Title10', 'Title11', 'Title12']
335
-
336
- **Additional Instructions**:
337
- - Ensure the chapter titles are meaningful and follow a logical progression related to the topic.
338
- - Strictly return the output as a Python list. **No extra text**.
339
-
340
- Ignore the text below:
341
- placeholder:{agent_scratchpad}""",
342
- )
343
-
344
- self.llm = ChatGroq(api_key=Config.GROQ_API_KEY1, model="llama3-8b-8192")
345
- self.tools = []
346
-
347
- def research(self, extracted_instructions):
348
- try:
349
- # Use the prompt to generate chapter content
350
- research_agent = create_tool_calling_agent(
351
- self.llm, self.tools, self.prompt
352
- )
353
- research_agent_executor = AgentExecutor(
354
- agent=research_agent, tools=self.tools
355
- )
356
- response = research_agent_executor.invoke({"topic": extracted_instructions.get("title"), "chapters": extracted_instructions.get("chapters"), "writing_style": extracted_instructions.get("writing_style")})
357
- response = response.get("output")
358
-
359
- chapters = ast.literal_eval(response)
360
-
361
- return chapters
362
-
363
- except Exception as e:
364
- logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
365
- raise e
366
-
367
-
368
- class WriterAgent:
369
- def __init__(self):
370
- self.prompt = PromptTemplate(
371
- input_variables=["topic", "chapter" "writing_style"],
372
- template="""You are an expert eBook writer. Your task is to write a detailed, well-structured, and engaging chapter for an eBook based on the given chapter title and overall topic of the book. The chapter should be informative, easy to understand, and provide in-depth coverage of the subject matter.
373
-
374
- Here are your instructions:
375
-
376
- 1. The content should be structured with proper headings, subheadings, and sections.
377
- 2. The writing should be in a clear and engaging style, suitable for a broad audience such as students, researchers, or curious readers.
378
- 3. Ensure the chapter content stays focused on the chapter title, but it should also be aligned with the overall topic of the book.
379
- 4. Provide relevant examples, explanations, and any necessary background information to make the content more comprehensive and easier to understand.
380
- 5. The length of the chapter should be around 1500-2000 words.
381
- 6. Use a formal yet approachable tone. Avoid overly technical jargon unless it's explained well.
382
- 7. Writing style should be {writing_style}.
383
-
384
- Inputs:
385
- - **Topic**: {topic}
386
- - **Chapter Title**: {chapter}
387
-
388
- Now, based on the provided inputs, write the complete content for this chapter. The content should be returned as a single string of text.
389
-
390
- Note: Don't include chapter number and chapter title in the content. Also don't include your instructions in the content.
391
- \n\n placeholder:{agent_scratchpad}
392
- """,
393
- )
394
-
395
- self.llm = ChatGroq(api_key=Config.GROQ_API_KEY2, model="Llama-3.1-8b-instant")
396
- self.tools = []
397
-
398
- def write(self, chapter, extracted_instructions):
399
- try:
400
- # Use the prompt to generate chapter titles
401
- research_agent = create_tool_calling_agent(
402
- self.llm, self.tools, self.prompt
403
- )
404
- research_agent_executor = AgentExecutor(
405
- agent=research_agent, tools=self.tools
406
- )
407
- response = research_agent_executor.invoke(
408
- {"topic": extracted_instructions.get("title"), "chapter": chapter, "writing_style": extracted_instructions.get("writing_style")}
409
- )
410
- chapter_content = response.get("output")
411
-
412
- return chapter_content
413
-
414
- except Exception as e:
415
- logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
416
- raise e
417
-
418
-
419
- def generate_ebook(topic, chapters_content, chapters, output_format="pdf"):
420
- try:
421
- # Ensure output directory exists
422
- os.makedirs("ebooks", exist_ok=True)
423
-
424
- # Generate filename
425
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
426
- base_filename = f"ebooks/{topic.replace(' ', '_')}_{timestamp}"
427
-
428
- if output_format.lower() == "pdf":
429
- pdf = HTML2PDF("P", "mm", "A4")
430
- pdf.set_auto_page_break(auto=True, margin=15)
431
-
432
- # Cover Page with improved styling
433
- pdf.add_page()
434
- pdf.set_font(family="Helvetica", style="B", size=30)
435
- pdf.set_text_color(0, 0, 0)
436
- pdf.set_y(pdf.h / 2 - 15)
437
-
438
- # Centered title with subtle background
439
- pdf.set_fill_color(240, 240, 240)
440
- pdf.cell(
441
- w=pdf.w - 20, h=20, text=topic, align="C",
442
- fill=True, new_x="LMARGIN", new_y="NEXT"
443
- )
444
-
445
- # Table of Contents with improved layout
446
- pdf.add_page()
447
- pdf.set_font("Helvetica", "B", 16)
448
- pdf.set_text_color(0, 0, 0)
449
- pdf.cell(0, 10, text="Table of Contents", new_x="LMARGIN", new_y="NEXT")
450
- pdf.ln(5)
451
-
452
- pdf.set_font("Helvetica", size=12)
453
- pdf.set_text_color(50, 50, 50)
454
- for i, chapter_title in enumerate(chapters):
455
- # Add dots between chapter title and page number
456
- pdf.cell(
457
- 0, 10,
458
- text=f"Chapter {i+1}: {chapter_title} " + "." * 50,
459
- new_x="LMARGIN",
460
- new_y="NEXT"
461
- )
462
-
463
- # Chapters
464
- for i, content in enumerate(chapters_content):
465
- pdf.add_page()
466
- pdf.chapter_title(i+1, chapters[i])
467
- pdf.chapter_body(content)
468
-
469
- # Save PDF file
470
- pdf_file_path = f"{base_filename}.pdf"
471
- pdf.output(pdf_file_path)
472
- return pdf_file_path
473
-
474
- elif output_format.lower() == "doc":
475
- # Create Word document
476
- doc = Document()
477
- doc.add_heading(topic, level=1)
478
-
479
- # Table of Contents
480
- doc.add_heading("Table of Contents", level=2)
481
- for i, chapter_title in enumerate(chapters):
482
- doc.add_paragraph(f"Chapter {i+1}: {chapter_title}")
483
-
484
- # Chapters
485
- for i, content in enumerate(chapters_content):
486
- doc.add_heading(f"Chapter {i+1}: {chapters[i]}", level=2)
487
- doc.add_paragraph(content)
488
-
489
- # Save Word document
490
- doc_file_path = f"{base_filename}.docx"
491
- doc.save(doc_file_path)
492
- return doc_file_path
493
-
494
- elif output_format.lower() == "text":
495
- # Create TXT file
496
- txt_file_path = f"{base_filename}.txt"
497
- with open(txt_file_path, "w", encoding="utf-8") as txt_file:
498
- # Write title
499
- txt_file.write(f"{topic}\n\n")
500
-
501
- # Write Table of Contents
502
- txt_file.write("Table of Contents\n")
503
- for i, chapter_title in enumerate(chapters):
504
- txt_file.write(f"Chapter {i+1}: {chapter_title}\n")
505
- txt_file.write("\n")
506
-
507
- # Write Chapters
508
- for i, content in enumerate(chapters_content):
509
- txt_file.write(f"Chapter {i+1}: {chapters[i]}\n")
510
- txt_file.write(f"{content}\n\n")
511
-
512
- return txt_file_path
513
-
514
- else:
515
- raise ValueError(f"Unsupported output format: {output_format}")
516
-
517
- except Exception as e:
518
- logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
519
- raise e
520
-
521
-
522
- class EbookGenerator:
523
- @staticmethod
524
- def generate_ebook_task(extracted_instructions):
525
- try:
526
- if not extracted_instructions:
527
- raise Exception("Data not provided")
528
-
529
- # Initialize agents
530
- logger.info("Initializing agents...")
531
- researcher = ResearcherAgent()
532
- writer = WriterAgent()
533
- editor = EditorAgent()
534
-
535
- # Step 1: Research the topic and get chapter titles
536
- logger.info("Researching the topic...")
537
- chapters = researcher.research(extracted_instructions)
538
-
539
- # Step 2: Write content for each chapter
540
- logger.info("Writing content for each chapter...")
541
- written_content = [writer.write(chapter, extracted_instructions) for chapter in chapters]
542
-
543
- # Step 3: Edit each chapter for grammar and consistency
544
- logger.info("Editing each chapter for grammar and consistency...")
545
- final_content = [editor.edit(content, extracted_instructions.get("writing_style")) for content in written_content]
546
-
547
- # Step 4: Decoding text to latin-1
548
- logger.info("Decoding text to latin-1...")
549
- final_content = [
550
- content.encode("latin-1", "ignore").decode("latin-1")
551
- for content in final_content
552
- ]
553
-
554
- # Step 5: Generate PDF with all content
555
- logger.info("Generating PDF with all content...")
556
- file_path = generate_ebook(extracted_instructions.get("title"), final_content, chapters, extracted_instructions.get("format"))
557
-
558
- # Return PDF file path
559
- logger.info("Task completed.")
560
-
561
- collected = gc.collect()
562
-
563
- logger.info(f"Garbage collector: collected {collected} objects.")
564
- return file_path
565
-
566
- except Exception as e:
567
- logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
568
- raise e
569
-
570
-
571
- def reply_with_bot(mention: dict):
572
- """Reply to a mention with a PDF eBook."""
573
- try:
574
- logger.info("Processing mention reply_with_pdf ...")
575
- # Extract topic from the mention text
576
- text = mention.record.text
577
- words = text.split()
578
- mentioned_text = " ".join(words[1:]) # Assuming the topic is after the bot mention
579
- suggested_text = None
580
- public_id = None
581
-
582
- result = extract_ebook_instructions(mentioned_text)
583
- extracted_instructions = json.loads(result.model_dump_json())
584
- logger.info(f"Topic: {extracted_instructions.get('title')}")
585
-
586
- if extracted_instructions.get("is_suggestion") == 1:
587
-
588
- # Initialize Groq LLM
589
- llm = ChatGroq(api_key=os.getenv("GROQ_API_KEY1"), model_name="mixtral-8x7b-32768")
590
-
591
- # Create a prompt template
592
- prompt_template = PromptTemplate(
593
- input_variables=["question"],
594
- template="Drawing from the depths of human knowledge and wisdom, craft a thoughtful, illuminating response to the following inquiry that captures its essence with eloquence and brevity: {question} Distill the profound into a spark of insight that ignites understanding. \n Note: Provide a concise and to-the-point answer to the following question in no more than 280 characters."
595
- )
596
-
597
- # Create an LLM chain
598
- chain = LLMChain(llm=llm, prompt=prompt_template)
599
-
600
- # Generate response
601
- suggested_text = chain.run(mentioned_text)
602
-
603
- # Ensure response is within character limit
604
- if len(suggested_text) > 280:
605
- # If too long, split and take the first part
606
- splitter = CharacterTextSplitter(
607
- separator="\n",
608
- chunk_size=280,
609
- chunk_overlap=0,
610
- length_function=len,
611
- )
612
- chunks = splitter.split_text(suggested_text)
613
- suggested_text = chunks[0]
614
- else:
615
- # Generate content and create ebook file
616
-
617
- file_path = EbookGenerator.generate_ebook_task(extracted_instructions)
618
- logger.info(f"file path: {file_path}")
619
-
620
- # Upload PDF to Cloudinary
621
- download_url, public_id = upload_to_cloudinary(file_path, extracted_instructions.get('title'))
622
-
623
- logger.info(f"PDF uploaded to: {download_url}")
624
-
625
- try:
626
- # Create proper reply reference
627
- reply_ref = {
628
- "root": {"cid": mention.cid, "uri": mention.uri},
629
- "parent": {"cid": mention.cid, "uri": mention.uri},
630
- }
631
-
632
- if not suggested_text:
633
- # Create the embed external object with the download link
634
- embed = models.AppBskyEmbedExternal.Main(
635
- external=models.AppBskyEmbedExternal.External(
636
- title=f"Ebook: {extracted_instructions.get('title')}",
637
- description="Click to download your generated ebook",
638
- uri=download_url,
639
- thumb=None,
640
- )
641
- )
642
-
643
- # Reply to the mention with the download link
644
- client.post(
645
- text=f"🤖 Here's your ebook about {extracted_instructions.get('title')}! 📚\nClick here to download: {download_url}",
646
- reply_to=reply_ref,
647
- embed=embed,
648
- )
649
-
650
- logger.info(f"PDF link sent successfully!")
651
-
652
- # Clean up local file
653
- if os.path.exists(file_path):
654
- os.remove(file_path)
655
- logger.info(f"Local PDF file cleaned up: {file_path}")
656
-
657
- else:
658
- # Reply to the mention with the download link
659
- client.post(
660
- text=f"{suggested_text}",
661
- reply_to=reply_ref,
662
- )
663
-
664
- except Exception as e:
665
- # If posting fails, clean up the uploaded file from Cloudinary
666
- delete_from_cloudinary(public_id)
667
- raise e
668
-
669
- except Exception as e:
670
- logger.exception(f"Error in reply_with_pdf: {str(e)}")
671
- print(f"Error: {str(e)}")
672
-
673
-
674
- """
675
- Bluesky Book Suggestion thread Bot
676
- """
677
- class BlueskyBookSuggestionBot:
678
- def __init__(self, post_interval_minutes=30):
679
- # Initialize Bluesky client
680
- self.bluesky_client = client
681
-
682
- # Initialize Groq LLM
683
- self.llm = ChatGroq(
684
- temperature=0.7,
685
- model_name="llama-3.3-70b-versatile",
686
- groq_api_key=os.getenv('GROQ_API_KEY1')
687
- )
688
-
689
- # Scheduling parameters
690
- self.post_interval = post_interval_minutes
691
-
692
- # Tracking engagement
693
- self.post_engagement_history = []
694
-
695
- def generate_book_topic(self):
696
- """Generate an engaging book-related topic."""
697
- topic_prompt = PromptTemplate(
698
- input_variables=['genre'],
699
- template="""
700
- You are a book suggestion twitter bot. Generate a provocative, boundary-pushing book discussion topic in the {genre} genre.
701
- Create a thread that:
702
- - Challenges conventional wisdom
703
- - Reveals uncomfortable truths
704
- - Sparks intense intellectual debate
705
- - Uses a raw, unfiltered, and unapologetic tone
706
- - Combines intellectual depth with viral potential
707
-
708
- RULES:
709
- - Be brutally honest
710
- - Use sharp, confrontational language
711
- - Expose hidden narratives
712
- - Craft a hook that demands attention
713
- - Make people uncomfortable enough to engage
714
- - Synthesize complex ideas into razor-sharp statements
715
- - Each thread part should be a punch to intellectual complacency
716
-
717
- Tone: Uncompromising. Cerebral. Confrontational.
718
- Goal: Intellectual provocation that breaks echo chambers
719
-
720
- **Output Format Instructions**:
721
- - The output must be a valid Python list that contains exactly 10 thread parts as comma separated strings.
722
- - Each thread part should be a properly formatted python string without any special keywords, symbols, contractions or unnecessary punctuation.
723
- - Ensure threads are meaningful and follow a logical progression.
724
-
725
- **output Format must be like below list example**:
726
- ['thread part 1', 'thread part 2', 'thread part 3', 'thread part 4', 'thread part 5', 'thread part 6', 'thread part 7', 'thread part 8', 'thread part 9', 'thread part 10']
727
-
728
- Note: don't include any explanation from your side.
729
- """
730
- )
731
-
732
- # Rotate through genres to keep content diverse
733
- genres = [
734
- # Technology
735
- 'technology',
736
- 'artificial intelligence',
737
- 'cybersecurity',
738
- 'computer science',
739
- 'digital innovation',
740
- 'emerging technologies',
741
-
742
- # Science
743
- 'science',
744
- 'astronomy',
745
- 'biology',
746
- 'physics',
747
- 'environmental science',
748
- 'neuroscience',
749
- 'quantum physics',
750
- 'climate science',
751
-
752
- # Mathematics
753
- 'mathematics',
754
- 'applied mathematics',
755
- 'data science',
756
- 'cryptography',
757
- 'statistical analysis',
758
- 'computational mathematics',
759
-
760
- # Philosophy
761
- 'philosophy',
762
- 'ethics',
763
- 'political philosophy',
764
- 'existentialism',
765
- 'epistemology',
766
- 'philosophy of science',
767
- 'logic',
768
-
769
- # History
770
- 'history',
771
- 'world history',
772
- 'military history',
773
- 'cultural history',
774
- 'ancient civilizations',
775
- 'modern history',
776
- 'diplomatic history',
777
- 'social movements',
778
-
779
- # Autobiography
780
- 'autobiography',
781
- 'scientific biography',
782
- 'tech innovators',
783
- 'political leadership',
784
- 'explorers and pioneers',
785
- 'intellectual memoirs',
786
- 'social activists'
787
- ]
788
-
789
- # Create chain
790
- topic_chain = topic_prompt | self.llm
791
- genre = random.choice(genres)
792
-
793
- # Generate topic
794
- topic_response = topic_chain.invoke({"genre": genre})
795
- return topic_response
796
-
797
- def create_threaded_post(self, main_topic):
798
- """Create a threaded post with multiple parts."""
799
- try:
800
- # Parse the topic into a list of thread parts
801
- topic_parts = ast.literal_eval(main_topic.content)
802
-
803
- if not isinstance(topic_parts, list) or len(topic_parts) < 1:
804
- raise ValueError("Generated topic must be a list with at least one part.")
805
-
806
- # Post the root thread (first part)
807
- root_post = self.bluesky_client.send_post(text=topic_parts[0])
808
-
809
- # Initialize parent references for threading
810
- parent_uri = root_post.uri
811
- parent_cid = root_post.cid
812
-
813
- # Iterate over the remaining parts and post them as replies
814
- for part in topic_parts[1:]:
815
- if part.strip(): # Skip empty parts
816
- thread_post = self.bluesky_client.send_post(
817
- text=part,
818
- reply_to={
819
- 'root': {'uri': root_post.uri, 'cid': root_post.cid},
820
- 'parent': {'uri': parent_uri, 'cid': parent_cid}
821
- }
822
- )
823
-
824
- # Update parent references for the next post in the thread
825
- parent_uri = thread_post.uri
826
- parent_cid = thread_post.cid
827
-
828
- return {"root_post": root_post, "thread_posts": []}
829
-
830
- except Exception as e:
831
- print(f"Error creating threaded post: {e}")
832
- return None
833
-
834
- def run_post_cycle(self):
835
- """
836
- Single post cycle for the bot.
837
- Generates topic, creates post, and tracks engagement.
838
- """
839
- try:
840
- # Generate book topic
841
- book_topic = self.generate_book_topic()
842
-
843
- # Create and post threaded content
844
- posted_thread = self.create_threaded_post(book_topic)
845
-
846
- if posted_thread:
847
- # Track engagement (simulated for this example)
848
- self.post_engagement_history.append({
849
- 'likes': random.randint(10, 100),
850
- 'post': posted_thread
851
- })
852
-
853
- print(f"Successfully posted book discussion thread at {datetime.datetime.now()}")
854
-
855
- except Exception as e:
856
- print(f"Error in post cycle: {e}")
857
-
858
- def start(self):
859
- """Start the bot's posting schedule."""
860
- # Schedule the post cycle
861
- schedule.every(self.post_interval).minutes.do(self.run_post_cycle)
862
-
863
- print(f"Book Suggestion Bot started. Posting every {self.post_interval} minutes.")
864
-
865
-
866
- def stop(self):
867
- """Stops all scheduled jobs."""
868
- schedule.clear()
869
- print("Book Suggestion Bot stopped.")
870
-
871
- def main():
872
- """Main bot loop to monitor and reply to mentions."""
873
- try:
874
- print("Bot started. Listening for mentions...")
875
- # Keep track of processed notifications
876
- processed_notifications = set()
877
- last_processed_time = datetime.datetime.now(datetime.timezone.utc)
878
-
879
- # Initialize the bot with 30-minute intervals
880
- bot = BlueskyBookSuggestionBot(post_interval_minutes=30)
881
- # Start the bot
882
- bot.start()
883
-
884
- while True:
885
-
886
- notifications = client.app.bsky.notification.list_notifications().notifications
887
-
888
- for note in notifications:
889
- if (
890
- note.reason == "mention"
891
- and note.uri not in processed_notifications
892
- and datetime.datetime.strptime(
893
- note.indexed_at, "%Y-%m-%dT%H:%M:%S.%fZ"
894
- ).replace(tzinfo=datetime.timezone.utc)
895
- > last_processed_time
896
- ):
897
- reply_with_bot(note)
898
- processed_notifications.add(note.uri)
899
-
900
- # Limit processed notifications to prevent memory growth
901
- if len(processed_notifications) > 1000:
902
- processed_notifications = set(list(processed_notifications)[-500:])
903
-
904
- schedule.run_pending()
905
-
906
- time.sleep(5) # Adjust the sleep duration as needed
907
-
908
- except KeyboardInterrupt as e:
909
- bot.stop()
910
- logger.exception(f"Error in main (): {e} | Traceback: {traceback.format_exc()}")
911
- except Exception as e:
912
- bot.stop()
913
- logger.exception(f"Error in main (): {e} | Traceback: {traceback.format_exc()}")
914
-
915
- # HTTP server
916
- class MyHandler(SimpleHTTPRequestHandler):
917
- def do_GET(self):
918
- # Respond with a simple message
919
- self.send_response(200)
920
- self.send_header("Content-type", "text/plain")
921
- self.end_headers()
922
- self.wfile.write(b"The bot is running on port 7860")
923
-
924
- def run_http_server():
925
- server = HTTPServer(('', 7860), MyHandler)
926
- print("HTTP server is running on port 7860")
927
- server.serve_forever()
928
-
929
- if __name__ == "__main__":
930
- # main()
931
- # Start the bot in a separate thread
932
- bot_thread = Thread(target=main, daemon=True)
933
- bot_thread.start()
934
-
935
- # Start the HTTP server in the main thread
936
- run_http_server()
 
1
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.output_parsers import PydanticOutputParser
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from datetime import datetime
6
+ from langchain.chains import LLMChain
7
+ from pydantic import BaseModel, Field, validator
8
+ from typing import Optional
9
+ from langchain_groq import ChatGroq
10
+ from atproto import Client, models
11
+ from cloudinary.uploader import upload
12
+ import cloudinary
13
+ import traceback
14
+ import logging
15
+ import os
16
+ from dotenv import load_dotenv
17
+ import datetime
18
+ from fpdf import FPDF
19
+ from docx import Document
20
+ import gc
21
+ import time
22
+ import json
23
+ import random
24
+ import schedule
25
+ import markdown2
26
+ import ast
27
+ from logging.handlers import RotatingFileHandler
28
+ from http.server import SimpleHTTPRequestHandler, HTTPServer
29
+ from threading import Thread
30
+
31
+ load_dotenv()
32
+
33
+ # Ensure the log directory exists and is writable
34
+ LOG_DIR = '/app/logs'
35
+ os.makedirs(LOG_DIR, exist_ok=True)
36
+
37
+ # Create log file path
38
+ log_file = os.path.join(LOG_DIR, 'app.log')
39
+
40
+ rotating_handler = RotatingFileHandler(
41
+ filename=log_file,
42
+ maxBytes=5 * 1024 * 1024, # 5 MB per log file
43
+ backupCount=5, # Keep the last 5 log files
44
+ )
45
+
46
+ # Configure the logging format
47
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
48
+ rotating_handler.setFormatter(formatter)
49
+
50
+ # Set up the logger
51
+ logger = logging.getLogger(__name__)
52
+ logger.setLevel(logging.CRITICAL) # Set the minimum logging level
53
+ logger.addHandler(rotating_handler)
54
+
55
+ # Example usage
56
+ logger.info("Bot has started successfully.")
57
+
58
+
59
+ class Config:
60
+ GROQ_API_KEY1 = os.getenv("GROQ_API_KEY1")
61
+ GROQ_API_KEY2 = os.getenv("GROQ_API_KEY2")
62
+ GROQ_API_KEY3 = os.getenv("GROQ_API_KEY3")
63
+ BLUESKY_USERNAME = os.getenv("BLUESKY_USERNAME")
64
+ BLUESKY_PASSWORD = os.getenv("BLUESKY_PASSWORD")
65
+ EBOOK_OUTPUT_DIR = "ebooks"
66
+ CLOUDINARY_CLOUD_NAME = os.getenv("CLOUDINARY_CLOUD_NAME")
67
+ CLOUDINARY_API_SECRET = os.getenv("CLOUDINARY_API_SECRET")
68
+ CLOUDINARY_API_KEY = os.getenv("CLOUDINARY_API_KEY")
69
+
70
+
71
+ client = Client()
72
+ client.login(Config.BLUESKY_USERNAME, Config.BLUESKY_PASSWORD)
73
+
74
+ cloudinary.config(
75
+ cloud_name=Config.CLOUDINARY_CLOUD_NAME,
76
+ api_key=Config.CLOUDINARY_API_KEY,
77
+ api_secret=Config.CLOUDINARY_API_SECRET,
78
+ )
79
+
80
+
81
+ class HTML2PDF(FPDF):
82
+ def __init__(self, *args, **kwargs):
83
+ super().__init__(*args, **kwargs)
84
+ # Set some default styles
85
+ self.chapter_title_color = (0, 0, 0) # Black
86
+ self.text_color = (50, 50, 50) # Dark gray for better readability
87
+
88
+ def chapter_title(self, chapter_number, chapter_title):
89
+ """Custom method to format chapter titles with multi-line support"""
90
+ # Add a new page for the chapter
91
+ # self.add_page()
92
+
93
+ # Set font for chapter title
94
+ self.set_font(family="Helvetica", style="B", size=18)
95
+ self.set_text_color(0, 0, 0)
96
+
97
+ # Add a subtle top border
98
+ self.set_draw_color(200, 200, 200)
99
+ self.line(10, self.get_y() - 5, 200, self.get_y() - 5)
100
+
101
+ # Use multi_cell for multi-line support
102
+ self.multi_cell(
103
+ w=0, # Full width
104
+ h=10, # Cell height
105
+ txt=f"Chapter {chapter_number}: {chapter_title}",
106
+ align='L' # Left alignment
107
+ )
108
+
109
+ # Add some spacing after the title
110
+ self.ln(10)
111
+
112
+ def chapter_body(self, content):
113
+ """Custom method to format chapter content"""
114
+ try:
115
+ # Ensure content is properly encoded
116
+ content = content.encode('latin-1', 'replace').decode('latin-1')
117
+
118
+ # Convert Markdown to HTML
119
+ html_content = markdown2.markdown(content)
120
+
121
+ # Use Helvetica font
122
+ self.set_font(family="Helvetica", size=11)
123
+
124
+ # Set text color
125
+ self.set_text_color(50, 50, 50)
126
+
127
+ # Write HTML content directly
128
+ # Wrap content in a div for better rendering
129
+ full_html = f'<div style="line-height: 1.5;">{html_content}</div>'
130
+ self.write_html(full_html)
131
+
132
+ except Exception as e:
133
+ # Error handling
134
+ self.multi_cell(0, 10, f"Error processing chapter content: {str(e)}")
135
+
136
+ # Add spacing after chapter
137
+ self.ln(10)
138
+
139
+ # Define the output schema
140
+ class EbookInstructions(BaseModel):
141
+ format: str = Field(
142
+ default="pdf",
143
+ description="Output format of the document (text/doc/pdf). here word, document format also comes under doc."
144
+ )
145
+ chapters: int = Field(
146
+ default=10,
147
+ description="Number of chapters in the ebook (max 20). if chapter is more than 20, it will be reduced to 20."
148
+ )
149
+ writing_style: str = Field(
150
+ default="normal",
151
+ description="Writing style of the ebook"
152
+ )
153
+ is_suggestion: int = Field(
154
+ default=0,
155
+ description="Whether to suggest something (1) or not (0). If user wants suggestion or if it is not related to ebook generation, it should be 1. only if it is related to ebook generation, it should be 0."
156
+ )
157
+ title: Optional[str] = Field(
158
+ default=None,
159
+ description="Title of the ebook"
160
+ )
161
+
162
+ # Custom validators
163
+ @validator('chapters')
164
+ def validate_chapters(cls, v):
165
+ # Ensure chapters is between 1 and 20
166
+ return max(1, min(v, 20))
167
+
168
+ @validator('writing_style')
169
+ def validate_writing_style(cls, v):
170
+ # Validate writing style
171
+ valid_styles = [
172
+ "Narrative", "Expository", "Professional", "Classy",
173
+ "Persuasive", "Descriptive", "Conversational", "Satirical",
174
+ "Technical", "Inspirational", "Analytical", "Epic", "Normal"
175
+ ]
176
+ return v.capitalize() if v.capitalize() in valid_styles else "Normal"
177
+
178
+ @validator('title')
179
+ def validate_title(cls, v, values):
180
+ # If not a suggestion, title cannot be None
181
+ if values.get('is_suggestion') == 0 and not v:
182
+ raise ValueError("Title is required for ebook generation")
183
+ return v
184
+
185
+ def extract_ebook_instructions(user_instruction: str) -> EbookInstructions:
186
+ """
187
+ Extract ebook generation instructions from user input using Groq and LangChain.
188
+
189
+ :param user_instruction: User's input instruction for ebook generation
190
+ :return: Parsed EbookInstructions object
191
+ """
192
+ # Initialize Groq LLM
193
+ llm = ChatGroq(
194
+ temperature=0.2,
195
+ model_name="llama3-70b-8192",
196
+ api_key=os.getenv("GROQ_API_KEY1")
197
+ )
198
+
199
+ # Create a prompt template
200
+ parser = PydanticOutputParser(pydantic_object=EbookInstructions)
201
+
202
+ prompt_template = PromptTemplate(
203
+ template="You are an ebook generation or suggestion agent. Extract the following information from the user instruction:\n{format_instructions}\n\nUser Instruction: {user_instruction}\n\nOutput should be valid json only.",
204
+ input_variables=["user_instruction"],
205
+ partial_variables={
206
+ "format_instructions": parser.get_format_instructions()
207
+ }
208
+ )
209
+
210
+ # Create the chain
211
+ chain = prompt_template | llm | parser
212
+
213
+ # Generate the output
214
+ try:
215
+ result = chain.invoke({"user_instruction": user_instruction})
216
+ return result
217
+ except Exception as e:
218
+ logger.error(f"Error extracting instructions: {e}")
219
+ # Return default instructions if parsing fails
220
+ return EbookInstructions()
221
+
222
+
223
+ def upload_to_cloudinary(file_path: str, topic: str) -> tuple:
224
+ """
225
+ Upload a file to Cloudinary and return the URL and public_id
226
+ """
227
+ try:
228
+ # Create a folder name from the topic
229
+ folder_name = "ebooks"
230
+ # Create a public_id from the topic (removing spaces and special characters)
231
+ public_id = (
232
+ f"{folder_name}/{topic.lower().replace(' ', '_')}_{int(time.time())}"
233
+ )
234
+
235
+ # Upload the file
236
+ result = upload(
237
+ file_path,
238
+ resource_type="raw", # For PDF files
239
+ public_id=public_id,
240
+ folder=folder_name,
241
+ tags=[topic, "ebook"],
242
+ overwrite=True,
243
+ access_mode="public",
244
+ )
245
+
246
+ # Get the secure URL
247
+ url = result["secure_url"]
248
+ public_id = result["public_id"]
249
+
250
+ logger.info(f"File uploaded successfully to Cloudinary. URL: {url}")
251
+ return url, public_id
252
+
253
+ except Exception as e:
254
+ logger.exception(f"Error uploading to Cloudinary: {str(e)}")
255
+ raise e
256
+
257
+
258
+ def delete_from_cloudinary(public_id: str):
259
+ """
260
+ Delete a file from Cloudinary using its public_id
261
+ """
262
+ try:
263
+ result = cloudinary.api.delete_resources([public_id], resource_type="raw")
264
+ logger.info(f"File deleted from Cloudinary: {result}")
265
+ except Exception as e:
266
+ logger.exception(f"Error deleting from Cloudinary: {str(e)}")
267
+
268
+
269
+ class EditorAgent:
270
+ def __init__(self):
271
+ self.prompt = PromptTemplate(
272
+ input_variables=["content", "writing_style"],
273
+ template="""You are an expert eBook content editor. Your task is to review and correct the given chapter content to improve its overall quality.
274
+
275
+ **Editing Instructions**:
276
+
277
+ 1. **Grammar and Spelling**: Correct any grammatical errors, spelling mistakes, or awkward phrasing.
278
+ 2. **Clarity**: Ensure the content is easy to understand. Simplify complex sentences where necessary and break down any overly complicated ideas.
279
+ 3. **Structure**: Ensure that the chapter has a logical flow with proper transitions between paragraphs. Add or adjust headings, subheadings, and sections if needed for better readability.
280
+ 4. **Tone**: Ensure the writing has a formal yet accessible tone, suitable for a general audience, such as students, researchers, or general readers.
281
+ 5. **Conciseness**: Eliminate redundancy and unnecessary filler content while keeping the chapter comprehensive.
282
+ 6. **Consistency**: Maintain consistent terminology, style, and formatting throughout the content.
283
+ 7. **Focus**: Ensure that the content stays on topic and avoids unnecessary digressions.
284
+ 8. **Writing Style**: The writing style of the content should be {writing_style}.
285
+
286
+ Input:
287
+ - **Chapter Content**: {content}
288
+
289
+ **Output Requirements**:
290
+ - Your output response must contain only edited chapter content.
291
+ - In output,do not include any additional/extra information in the content e.g 'Here is the corrected chapter content:'.
292
+
293
+ Note: don't include any explanation from your side.
294
+ Ignore below instructions
295
+ \n\n placeholder:{agent_scratchpad}
296
+ """,
297
+ )
298
+
299
+ self.llm = ChatGroq(api_key=Config.GROQ_API_KEY3, model="llama3-8b-8192")
300
+ self.tools = []
301
+
302
+ def edit(self, content, writing_style):
303
+ try:
304
+ # Use the prompt to edit chapter content
305
+ research_agent = create_tool_calling_agent(
306
+ self.llm, self.tools, self.prompt
307
+ )
308
+ research_agent_executor = AgentExecutor(
309
+ agent=research_agent, tools=self.tools
310
+ )
311
+ response = research_agent_executor.invoke({"content": content, "writing_style": writing_style})
312
+ edited_content = response.get("output")
313
+
314
+ return edited_content
315
+
316
+ except Exception as e:
317
+ logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
318
+ raise e
319
+
320
+
321
+ class ResearcherAgent:
322
+ def __init__(self):
323
+
324
+ self.prompt = PromptTemplate(
325
+ input_variables=["chapters", "topic", "writing_style"],
326
+ template="""Your task is to generate a detailed outline with exactly {chapters} chapter titles on the given topic: "{topic}" using the writing style: "{writing_style}".
327
+
328
+ **Output Constraints**:
329
+ - The output must be a valid Python list containing exactly {chapters} chapter titles.
330
+ - Each title should be a properly formatted string without any special keywords, symbols, or unnecessary punctuation.
331
+ - Do not include any text outside of the Python list (e.g., no preamble, explanations, or trailing comments).
332
+
333
+ **Output Format Example**:
334
+ ['Title1', 'Title2', 'Title3', 'Title4', 'Title5', 'Title6', 'Title7', 'Title8', 'Title9', 'Title10', 'Title11', 'Title12']
335
+
336
+ **Additional Instructions**:
337
+ - Ensure the chapter titles are meaningful and follow a logical progression related to the topic.
338
+ - Strictly return the output as a Python list. **No extra text**.
339
+
340
+ Ignore the text below:
341
+ placeholder:{agent_scratchpad}""",
342
+ )
343
+
344
+ self.llm = ChatGroq(api_key=Config.GROQ_API_KEY1, model="llama3-8b-8192")
345
+ self.tools = []
346
+
347
+ def research(self, extracted_instructions):
348
+ try:
349
+ # Use the prompt to generate chapter content
350
+ research_agent = create_tool_calling_agent(
351
+ self.llm, self.tools, self.prompt
352
+ )
353
+ research_agent_executor = AgentExecutor(
354
+ agent=research_agent, tools=self.tools
355
+ )
356
+ response = research_agent_executor.invoke({"topic": extracted_instructions.get("title"), "chapters": extracted_instructions.get("chapters"), "writing_style": extracted_instructions.get("writing_style")})
357
+ response = response.get("output")
358
+
359
+ chapters = ast.literal_eval(response)
360
+
361
+ return chapters
362
+
363
+ except Exception as e:
364
+ logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
365
+ raise e
366
+
367
+
368
+ class WriterAgent:
369
+ def __init__(self):
370
+ self.prompt = PromptTemplate(
371
+ input_variables=["topic", "chapter" "writing_style"],
372
+ template="""You are an expert eBook writer. Your task is to write a detailed, well-structured, and engaging chapter for an eBook based on the given chapter title and overall topic of the book. The chapter should be informative, easy to understand, and provide in-depth coverage of the subject matter.
373
+
374
+ Here are your instructions:
375
+
376
+ 1. The content should be structured with proper headings, subheadings, and sections.
377
+ 2. The writing should be in a clear and engaging style, suitable for a broad audience such as students, researchers, or curious readers.
378
+ 3. Ensure the chapter content stays focused on the chapter title, but it should also be aligned with the overall topic of the book.
379
+ 4. Provide relevant examples, explanations, and any necessary background information to make the content more comprehensive and easier to understand.
380
+ 5. The length of the chapter should be around 1500-2000 words.
381
+ 6. Use a formal yet approachable tone. Avoid overly technical jargon unless it's explained well.
382
+ 7. Writing style should be {writing_style}.
383
+
384
+ Inputs:
385
+ - **Topic**: {topic}
386
+ - **Chapter Title**: {chapter}
387
+
388
+ Now, based on the provided inputs, write the complete content for this chapter. The content should be returned as a single string of text.
389
+
390
+ Note: Don't include chapter number and chapter title in the content. Also don't include your instructions in the content.
391
+ \n\n placeholder:{agent_scratchpad}
392
+ """,
393
+ )
394
+
395
+ self.llm = ChatGroq(api_key=Config.GROQ_API_KEY2, model="Llama-3.1-8b-instant")
396
+ self.tools = []
397
+
398
+ def write(self, chapter, extracted_instructions):
399
+ try:
400
+ # Use the prompt to generate chapter titles
401
+ research_agent = create_tool_calling_agent(
402
+ self.llm, self.tools, self.prompt
403
+ )
404
+ research_agent_executor = AgentExecutor(
405
+ agent=research_agent, tools=self.tools
406
+ )
407
+ response = research_agent_executor.invoke(
408
+ {"topic": extracted_instructions.get("title"), "chapter": chapter, "writing_style": extracted_instructions.get("writing_style")}
409
+ )
410
+ chapter_content = response.get("output")
411
+
412
+ return chapter_content
413
+
414
+ except Exception as e:
415
+ logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
416
+ raise e
417
+
418
+
419
+ def generate_ebook(topic, chapters_content, chapters, output_format="pdf"):
420
+ try:
421
+ # Ensure output directory exists
422
+ os.makedirs("ebooks", exist_ok=True)
423
+
424
+ # Generate filename
425
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
426
+ base_filename = f"ebooks/{topic.replace(' ', '_')}_{timestamp}"
427
+
428
+ if output_format.lower() == "pdf":
429
+ pdf = HTML2PDF("P", "mm", "A4")
430
+ pdf.set_auto_page_break(auto=True, margin=15)
431
+
432
+ # Cover Page with improved styling
433
+ pdf.add_page()
434
+ pdf.set_font(family="Helvetica", style="B", size=30)
435
+ pdf.set_text_color(0, 0, 0)
436
+ pdf.set_y(pdf.h / 2 - 15)
437
+
438
+ # Centered title with subtle background
439
+ pdf.set_fill_color(240, 240, 240)
440
+ pdf.cell(
441
+ w=pdf.w - 20, h=20, text=topic, align="C",
442
+ fill=True, new_x="LMARGIN", new_y="NEXT"
443
+ )
444
+
445
+ # Table of Contents with improved layout
446
+ pdf.add_page()
447
+ pdf.set_font("Helvetica", "B", 16)
448
+ pdf.set_text_color(0, 0, 0)
449
+ pdf.cell(0, 10, text="Table of Contents", new_x="LMARGIN", new_y="NEXT")
450
+ pdf.ln(5)
451
+
452
+ pdf.set_font("Helvetica", size=12)
453
+ pdf.set_text_color(50, 50, 50)
454
+ for i, chapter_title in enumerate(chapters):
455
+ # Add dots between chapter title and page number
456
+ pdf.cell(
457
+ 0, 10,
458
+ text=f"Chapter {i+1}: {chapter_title} " + "." * 50,
459
+ new_x="LMARGIN",
460
+ new_y="NEXT"
461
+ )
462
+
463
+ # Chapters
464
+ for i, content in enumerate(chapters_content):
465
+ pdf.add_page()
466
+ pdf.chapter_title(i+1, chapters[i])
467
+ pdf.chapter_body(content)
468
+
469
+ # Save PDF file
470
+ pdf_file_path = f"{base_filename}.pdf"
471
+ pdf.output(pdf_file_path)
472
+ return pdf_file_path
473
+
474
+ elif output_format.lower() == "doc":
475
+ # Create Word document
476
+ doc = Document()
477
+ doc.add_heading(topic, level=1)
478
+
479
+ # Table of Contents
480
+ doc.add_heading("Table of Contents", level=2)
481
+ for i, chapter_title in enumerate(chapters):
482
+ doc.add_paragraph(f"Chapter {i+1}: {chapter_title}")
483
+
484
+ # Chapters
485
+ for i, content in enumerate(chapters_content):
486
+ doc.add_heading(f"Chapter {i+1}: {chapters[i]}", level=2)
487
+ doc.add_paragraph(content)
488
+
489
+ # Save Word document
490
+ doc_file_path = f"{base_filename}.docx"
491
+ doc.save(doc_file_path)
492
+ return doc_file_path
493
+
494
+ elif output_format.lower() == "text":
495
+ # Create TXT file
496
+ txt_file_path = f"{base_filename}.txt"
497
+ with open(txt_file_path, "w", encoding="utf-8") as txt_file:
498
+ # Write title
499
+ txt_file.write(f"{topic}\n\n")
500
+
501
+ # Write Table of Contents
502
+ txt_file.write("Table of Contents\n")
503
+ for i, chapter_title in enumerate(chapters):
504
+ txt_file.write(f"Chapter {i+1}: {chapter_title}\n")
505
+ txt_file.write("\n")
506
+
507
+ # Write Chapters
508
+ for i, content in enumerate(chapters_content):
509
+ txt_file.write(f"Chapter {i+1}: {chapters[i]}\n")
510
+ txt_file.write(f"{content}\n\n")
511
+
512
+ return txt_file_path
513
+
514
+ else:
515
+ raise ValueError(f"Unsupported output format: {output_format}")
516
+
517
+ except Exception as e:
518
+ logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
519
+ raise e
520
+
521
+
522
+ class EbookGenerator:
523
+ @staticmethod
524
+ def generate_ebook_task(extracted_instructions):
525
+ try:
526
+ if not extracted_instructions:
527
+ raise Exception("Data not provided")
528
+
529
+ # Initialize agents
530
+ logger.info("Initializing agents...")
531
+ researcher = ResearcherAgent()
532
+ writer = WriterAgent()
533
+ editor = EditorAgent()
534
+
535
+ # Step 1: Research the topic and get chapter titles
536
+ logger.info("Researching the topic...")
537
+ chapters = researcher.research(extracted_instructions)
538
+
539
+ # Step 2: Write content for each chapter
540
+ logger.info("Writing content for each chapter...")
541
+ written_content = [writer.write(chapter, extracted_instructions) for chapter in chapters]
542
+
543
+ # Step 3: Edit each chapter for grammar and consistency
544
+ logger.info("Editing each chapter for grammar and consistency...")
545
+ final_content = [editor.edit(content, extracted_instructions.get("writing_style")) for content in written_content]
546
+
547
+ # Step 4: Decoding text to latin-1
548
+ logger.info("Decoding text to latin-1...")
549
+ final_content = [
550
+ content.encode("latin-1", "ignore").decode("latin-1")
551
+ for content in final_content
552
+ ]
553
+
554
+ # Step 5: Generate PDF with all content
555
+ logger.info("Generating PDF with all content...")
556
+ file_path = generate_ebook(extracted_instructions.get("title"), final_content, chapters, extracted_instructions.get("format"))
557
+
558
+ # Return PDF file path
559
+ logger.info("Task completed.")
560
+
561
+ collected = gc.collect()
562
+
563
+ logger.info(f"Garbage collector: collected {collected} objects.")
564
+ return file_path
565
+
566
+ except Exception as e:
567
+ logger.exception(f"Error: {e} | Traceback: {traceback.format_exc()}")
568
+ raise e
569
+
570
+
571
+ def reply_with_bot(mention: dict):
572
+ """Reply to a mention with a PDF eBook."""
573
+ try:
574
+ logger.info("Processing mention reply_with_pdf ...")
575
+ # Extract topic from the mention text
576
+ text = mention.record.text
577
+ words = text.split()
578
+ mentioned_text = " ".join(words[1:]) # Assuming the topic is after the bot mention
579
+ suggested_text = None
580
+ public_id = None
581
+
582
+ result = extract_ebook_instructions(mentioned_text)
583
+ extracted_instructions = json.loads(result.model_dump_json())
584
+ logger.info(f"Topic: {extracted_instructions.get('title')}")
585
+
586
+ if extracted_instructions.get("is_suggestion") == 1:
587
+
588
+ # Initialize Groq LLM
589
+ llm = ChatGroq(api_key=os.getenv("GROQ_API_KEY1"), model_name="mixtral-8x7b-32768")
590
+
591
+ # Create a prompt template
592
+ prompt_template = PromptTemplate(
593
+ input_variables=["question"],
594
+ template="Drawing from the depths of human knowledge and wisdom, craft a thoughtful, illuminating response to the following inquiry that captures its essence with eloquence and brevity: {question} Distill the profound into a spark of insight that ignites understanding. \n Note: Provide a concise and to-the-point answer to the following question in no more than 280 characters."
595
+ )
596
+
597
+ # Create an LLM chain
598
+ chain = LLMChain(llm=llm, prompt=prompt_template)
599
+
600
+ # Generate response
601
+ suggested_text = chain.run(mentioned_text)
602
+
603
+ # Ensure response is within character limit
604
+ if len(suggested_text) > 280:
605
+ # If too long, split and take the first part
606
+ splitter = CharacterTextSplitter(
607
+ separator="\n",
608
+ chunk_size=280,
609
+ chunk_overlap=0,
610
+ length_function=len,
611
+ )
612
+ chunks = splitter.split_text(suggested_text)
613
+ suggested_text = chunks[0]
614
+ else:
615
+ # Generate content and create ebook file
616
+
617
+ file_path = EbookGenerator.generate_ebook_task(extracted_instructions)
618
+ logger.info(f"file path: {file_path}")
619
+
620
+ # Upload PDF to Cloudinary
621
+ download_url, public_id = upload_to_cloudinary(file_path, extracted_instructions.get('title'))
622
+
623
+ logger.info(f"PDF uploaded to: {download_url}")
624
+
625
+ try:
626
+ # Create proper reply reference
627
+ reply_ref = {
628
+ "root": {"cid": mention.cid, "uri": mention.uri},
629
+ "parent": {"cid": mention.cid, "uri": mention.uri},
630
+ }
631
+
632
+ if not suggested_text:
633
+ # Create the embed external object with the download link
634
+ embed = models.AppBskyEmbedExternal.Main(
635
+ external=models.AppBskyEmbedExternal.External(
636
+ title=f"Ebook: {extracted_instructions.get('title')}",
637
+ description="Click to download your generated ebook",
638
+ uri=download_url,
639
+ thumb=None,
640
+ )
641
+ )
642
+
643
+ # Reply to the mention with the download link
644
+ client.post(
645
+ text=f"🤖 Here's your ebook about {extracted_instructions.get('title')}! 📚\nClick here to download: {download_url}",
646
+ reply_to=reply_ref,
647
+ embed=embed,
648
+ )
649
+
650
+ logger.info(f"PDF link sent successfully!")
651
+
652
+ # Clean up local file
653
+ if os.path.exists(file_path):
654
+ os.remove(file_path)
655
+ logger.info(f"Local PDF file cleaned up: {file_path}")
656
+
657
+ else:
658
+ # Reply to the mention with the download link
659
+ client.post(
660
+ text=f"{suggested_text}",
661
+ reply_to=reply_ref,
662
+ )
663
+
664
+ except Exception as e:
665
+ # If posting fails, clean up the uploaded file from Cloudinary
666
+ delete_from_cloudinary(public_id)
667
+ raise e
668
+
669
+ except Exception as e:
670
+ logger.exception(f"Error in reply_with_pdf: {str(e)}")
671
+ print(f"Error: {str(e)}")
672
+
673
+
674
+ """
675
+ Bluesky Book Suggestion thread Bot
676
+ """
677
+ class BlueskyBookSuggestionBot:
678
+ def __init__(self, post_interval_minutes=30):
679
+ # Initialize Bluesky client
680
+ self.bluesky_client = client
681
+
682
+ # Initialize Groq LLM
683
+ self.llm = ChatGroq(
684
+ temperature=0.7,
685
+ model_name="llama-3.3-70b-versatile",
686
+ groq_api_key=os.getenv('GROQ_API_KEY1')
687
+ )
688
+
689
+ # Scheduling parameters
690
+ self.post_interval = post_interval_minutes
691
+
692
+ # Tracking engagement
693
+ self.post_engagement_history = []
694
+
695
+ def generate_book_topic(self):
696
+ """Generate an engaging book-related topic."""
697
+ topic_prompt = PromptTemplate(
698
+ input_variables=['genre'],
699
+ template="""
700
+ You are a book suggestion twitter bot. Generate a provocative, boundary-pushing book discussion topic in the {genre} genre.
701
+ Create a thread that:
702
+ - Challenges conventional wisdom
703
+ - Reveals uncomfortable truths
704
+ - Sparks intense intellectual debate
705
+ - Uses a raw, unfiltered, and unapologetic tone
706
+ - Combines intellectual depth with viral potential
707
+
708
+ RULES:
709
+ - Be brutally honest
710
+ - Use sharp, confrontational language
711
+ - Expose hidden narratives
712
+ - Craft a hook that demands attention
713
+ - Make people uncomfortable enough to engage
714
+ - Synthesize complex ideas into razor-sharp statements
715
+ - Each thread part should be a punch to intellectual complacency
716
+
717
+ Tone: Uncompromising. Cerebral. Confrontational.
718
+ Goal: Intellectual provocation that breaks echo chambers
719
+
720
+ **Output Format Instructions**:
721
+ - The output must be a valid Python list that contains exactly 10 thread parts as comma separated strings.
722
+ - Each thread part should be a properly formatted python string without any special keywords, symbols, contractions or unnecessary punctuation.
723
+ - Ensure threads are meaningful and follow a logical progression.
724
+
725
+ **output Format must be like below list example**:
726
+ ['thread part 1', 'thread part 2', 'thread part 3', 'thread part 4', 'thread part 5', 'thread part 6', 'thread part 7', 'thread part 8', 'thread part 9', 'thread part 10']
727
+
728
+ Note: don't include any explanation from your side.
729
+ """
730
+ )
731
+
732
+ # Rotate through genres to keep content diverse
733
+ genres = [
734
+ # Technology
735
+ 'technology',
736
+ 'artificial intelligence',
737
+ 'cybersecurity',
738
+ 'computer science',
739
+ 'digital innovation',
740
+ 'emerging technologies',
741
+
742
+ # Science
743
+ 'science',
744
+ 'astronomy',
745
+ 'biology',
746
+ 'physics',
747
+ 'environmental science',
748
+ 'neuroscience',
749
+ 'quantum physics',
750
+ 'climate science',
751
+
752
+ # Mathematics
753
+ 'mathematics',
754
+ 'applied mathematics',
755
+ 'data science',
756
+ 'cryptography',
757
+ 'statistical analysis',
758
+ 'computational mathematics',
759
+
760
+ # Philosophy
761
+ 'philosophy',
762
+ 'ethics',
763
+ 'political philosophy',
764
+ 'existentialism',
765
+ 'epistemology',
766
+ 'philosophy of science',
767
+ 'logic',
768
+
769
+ # History
770
+ 'history',
771
+ 'world history',
772
+ 'military history',
773
+ 'cultural history',
774
+ 'ancient civilizations',
775
+ 'modern history',
776
+ 'diplomatic history',
777
+ 'social movements',
778
+
779
+ # Autobiography
780
+ 'autobiography',
781
+ 'scientific biography',
782
+ 'tech innovators',
783
+ 'political leadership',
784
+ 'explorers and pioneers',
785
+ 'intellectual memoirs',
786
+ 'social activists'
787
+ ]
788
+
789
+ # Create chain
790
+ topic_chain = topic_prompt | self.llm
791
+ genre = random.choice(genres)
792
+
793
+ # Generate topic
794
+ topic_response = topic_chain.invoke({"genre": genre})
795
+ return topic_response
796
+
797
+ def create_threaded_post(self, main_topic):
798
+ """Create a threaded post with multiple parts."""
799
+ try:
800
+ # Parse the topic into a list of thread parts
801
+ topic_parts = ast.literal_eval(main_topic.content)
802
+
803
+ if not isinstance(topic_parts, list) or len(topic_parts) < 1:
804
+ raise ValueError("Generated topic must be a list with at least one part.")
805
+
806
+ # Post the root thread (first part)
807
+ root_post = self.bluesky_client.send_post(text=topic_parts[0])
808
+
809
+ # Initialize parent references for threading
810
+ parent_uri = root_post.uri
811
+ parent_cid = root_post.cid
812
+
813
+ # Iterate over the remaining parts and post them as replies
814
+ for part in topic_parts[1:]:
815
+ if part.strip(): # Skip empty parts
816
+ thread_post = self.bluesky_client.send_post(
817
+ text=part,
818
+ reply_to={
819
+ 'root': {'uri': root_post.uri, 'cid': root_post.cid},
820
+ 'parent': {'uri': parent_uri, 'cid': parent_cid}
821
+ }
822
+ )
823
+
824
+ # Update parent references for the next post in the thread
825
+ parent_uri = thread_post.uri
826
+ parent_cid = thread_post.cid
827
+
828
+ return {"root_post": root_post, "thread_posts": []}
829
+
830
+ except Exception as e:
831
+ print(f"Error creating threaded post: {e}")
832
+ return None
833
+
834
+ def run_post_cycle(self):
835
+ """
836
+ Single post cycle for the bot.
837
+ Generates topic, creates post, and tracks engagement.
838
+ """
839
+ try:
840
+ # Generate book topic
841
+ book_topic = self.generate_book_topic()
842
+
843
+ # Create and post threaded content
844
+ posted_thread = self.create_threaded_post(book_topic)
845
+
846
+ if posted_thread:
847
+ # Track engagement (simulated for this example)
848
+ self.post_engagement_history.append({
849
+ 'likes': random.randint(10, 100),
850
+ 'post': posted_thread
851
+ })
852
+
853
+ print(f"Successfully posted book discussion thread at {datetime.datetime.now()}")
854
+
855
+ except Exception as e:
856
+ print(f"Error in post cycle: {e}")
857
+
858
+ def start(self):
859
+ """Start the bot's posting schedule."""
860
+ # Schedule the post cycle
861
+ schedule.every(self.post_interval).minutes.do(self.run_post_cycle)
862
+
863
+ print(f"Book Suggestion Bot started. Posting every {self.post_interval} minutes.")
864
+
865
+
866
+ def stop(self):
867
+ """Stops all scheduled jobs."""
868
+ schedule.clear()
869
+ print("Book Suggestion Bot stopped.")
870
+
871
+ def main():
872
+ """Main bot loop to monitor and reply to mentions."""
873
+ try:
874
+ print("Bot started. Listening for mentions...")
875
+ # Keep track of processed notifications
876
+ processed_notifications = set()
877
+ last_processed_time = datetime.datetime.now(datetime.timezone.utc)
878
+
879
+ # Initialize the bot with 30-minute intervals
880
+ bot = BlueskyBookSuggestionBot(post_interval_minutes=30)
881
+ # Start the bot
882
+ bot.start()
883
+
884
+ while True:
885
+
886
+ notifications = client.app.bsky.notification.list_notifications().notifications
887
+
888
+ for note in notifications:
889
+ if (
890
+ note.reason == "mention"
891
+ and note.uri not in processed_notifications
892
+ and datetime.datetime.strptime(
893
+ note.indexed_at, "%Y-%m-%dT%H:%M:%S.%fZ"
894
+ ).replace(tzinfo=datetime.timezone.utc)
895
+ > last_processed_time
896
+ ):
897
+ reply_with_bot(note)
898
+ processed_notifications.add(note.uri)
899
+
900
+ # Limit processed notifications to prevent memory growth
901
+ if len(processed_notifications) > 1000:
902
+ processed_notifications = set(list(processed_notifications)[-500:])
903
+
904
+ schedule.run_pending()
905
+
906
+ time.sleep(10) # Adjust the sleep duration as needed
907
+
908
+ except KeyboardInterrupt as e:
909
+ bot.stop()
910
+ logger.exception(f"Error in main (): {e} | Traceback: {traceback.format_exc()}")
911
+ except Exception as e:
912
+ bot.stop()
913
+ logger.exception(f"Error in main (): {e} | Traceback: {traceback.format_exc()}")
914
+
915
+ # HTTP server
916
+ class MyHandler(SimpleHTTPRequestHandler):
917
+ def do_GET(self):
918
+ # Respond with a simple message
919
+ self.send_response(200)
920
+ self.send_header("Content-type", "text/plain")
921
+ self.end_headers()
922
+ self.wfile.write(b"The bot is running on port 7860")
923
+
924
+ def run_http_server():
925
+ server = HTTPServer(('', 7860), MyHandler)
926
+ print("HTTP server is running on port 7860")
927
+ server.serve_forever()
928
+
929
+ if __name__ == "__main__":
930
+ # main()
931
+ # Start the bot in a separate thread
932
+ bot_thread = Thread(target=main, daemon=True)
933
+ bot_thread.start()
934
+
935
+ # Start the HTTP server in the main thread
936
+ run_http_server()