Spaces:

jsemrau
/

Postwriter

Runtime error

App Files Files Community

jsemrau commited on Sep 17, 2025

Commit

ed6e021

1 Parent(s): e63c60b

split utils and ui

Browse files

Files changed (2) hide show

app.py +77 -92
utils.py +93 -0

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()
 news_selector=2
 # Set up logging
@@ -77,6 +77,77 @@ openai_key=os.getenv('OPENAI')
 DEFAULT_INTERESTS = os.getenv('INTERESTS', 'cognition, sentience, finance, investing, orchestration')
 USE_LOCAL_MODELS = os.getenv('USE_LOCAL_MODELS', 'false').lower() == 'true'
 def check_environment():
     """Check if required environment variables are set"""
     if not HF_TOKEN:
@@ -203,92 +274,6 @@ def initialize_editor():
     except Exception as e:
         return f"âŒ Error initializing editor: {str(e)}"
-def clean_url(url):
-    """Clean tracking parameters from URLs"""
-    url = url.split('&')[0]
-    url= url.rstrip('/')
-    # Decode the path to fix encoded '?' or '=' that belong to the path, not query
-    fixed_url = urllib.parse.unquote(url)
-    return fixed_url
-def get_body(url):
-    """Extract article content from URL"""
-    body_text = ""
-    try:
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.5",
-            "Accept-Encoding": "gzip, deflate",
-            "Connection": "keep-alive",
-            "Upgrade-Insecure-Requests": "1",
-        }
-        headers = {
-                   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
-                  }
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        if url.endswith(".pdf") or "arxiv.org/pdf/" in url:
-            # PDF content
-            with BytesIO(response.content) as f:
-                reader = PdfReader(f)
-                text_parts = []
-                for page in reader.pages:
-                    text_parts.append(page.extract_text() or "")
-                body_text = "\n".join(text_parts)
-        else:
-            # HTML content
-            soup = BeautifulSoup(response.text, "html.parser")
-            paragraphs = soup.find_all(["p"])
-            body_text = " ".join([p.get_text(strip=True) for p in paragraphs])
-    except Exception as e:
-        print(f"Failed to fetch {url}: {e}")
-    return body_text
-def ner_tagger(text, model):
-    """Extract named entities from text"""
-    labels = ["Source", "Financial Metric", "Date", "Organization", "Person", "Product", "Percentage", "Monetary Value", "Duration"]
-    entities = model.predict_entities(text, labels, threshold=0.1)
-    return entities
-def remove_duplicate_relationships(data: str) -> str:
-    """Remove duplicate relationships from knowledge graph"""
-    lines = data.splitlines()
-    triples = []
-    subject = None
-    for line in lines:
-        parts = [part.strip() for part in line.split("-->")]
-        if len(parts) != 3:
-            continue
-        else:
-            if len(parts[0]) > 0:
-                subject = parts[0]
-            predicate = parts[1]
-            obj = parts[2]
-        triples.append((subject, predicate, obj))
-    unique_triples = sorted(set(triples))
-    grouped = defaultdict(list)
-    for subj, pred, obj in unique_triples:
-        grouped[subj].append(f"    -->{pred}--> {obj}")
-    output_lines = []
-    for subj in grouped:
-        output_lines.append(subj)
-        output_lines.extend(grouped[subj])
-    return '\n'.join(output_lines)
 def edit_single_article(post, edit_prompt):
     """Edit a single news article and generate LinkedIn post"""
     global editor_agent
@@ -657,8 +642,8 @@ def clear_work_queue():
 # Gradio Interface
 def create_interface():
     """Create the Gradio interface"""
-    #, theme=gr.themes.Soft()
-    with gr.Blocks(title="Post Generator") as app:
         gr.Markdown("#Post Generator")
         gr.Markdown("Generate engaging LinkedIn posts from recent news articles using AI agents and NER analysis.")
@@ -1010,9 +995,9 @@ if __name__ == "__main__":
     #Initialize the model
-    print("Starting to initialize models")
-    initialize_models()
-    print("Models have been initialized")
     # Create and launch the app
     app = create_interface()

 # Load environment variables from .env file
 load_dotenv()
+from utils import clean_url, get_body,ner_tagger,remove_duplicate_relationships
 news_selector=2
 # Set up logging
 DEFAULT_INTERESTS = os.getenv('INTERESTS', 'cognition, sentience, finance, investing, orchestration')
 USE_LOCAL_MODELS = os.getenv('USE_LOCAL_MODELS', 'false').lower() == 'true'
+# Check if HF_TOKEN is available
+if not HF_TOKEN:
+        print("❌ HuggingFace token not found. Please check your .env file.")
+try:
+    # Login to HuggingFace
+    login(HF_TOKEN, add_to_git_credential=False)
+    # Initialize NER model
+    print("Initialize NER")
+    ner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
+    print(f"Initialized NER")
+    llm_engine = InferenceClientModel(
+            api_key=HF_TOKEN,
+            model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
+            timeout=3000,
+            provider="fireworks-ai",
+            temperature=0.25
+        )
+    # Initialize agent
+    agent = CodeAgent(
+        model=llm_engine,
+        tools=[],
+        add_base_tools=False,
+        name="data_agent",
+        description="Runs data analysis for you.",
+        max_steps=1,
+    )
+    # Initialize agent
+    writer_agent = CodeAgent(
+        model=llm_engine,
+        tools=[],
+        add_base_tools=False,
+        name="writer_agent",
+        description="Write an engaging and creative LinkedIn post.",
+        max_steps=5,
+    )
+    writer_engine = InferenceClientModel(
+            api_key=HF_TOKEN,
+            model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
+            timeout=3000,
+            provider="fireworks-ai",
+            temperature=0.4
+        )
+    # Initialize agent
+    editor_agent = CodeAgent(
+        model=writer_engine,
+        tools=[],
+        add_base_tools=False,
+        name="editor_agent",
+        description="Edits LinkedIn post.",
+        max_steps=5,
+    )
+    # Add system prompt
+    #system_prompt = f"You are a strategic digital marketing manager focused on improving my social footprint. My interests are {interests}. You will receive a social media post. Please let me know which one I should react on."
+    #agent.prompt_templates["system_prompt"] += system_prompt
+    return "âœ… Models initialized successfully!"
+except Exception as e:
+    print( f"âŒ Error initializing models: {str(e)}")
 def check_environment():
     """Check if required environment variables are set"""
     if not HF_TOKEN:
     except Exception as e:
         return f"âŒ Error initializing editor: {str(e)}"
 def edit_single_article(post, edit_prompt):
     """Edit a single news article and generate LinkedIn post"""
     global editor_agent
 # Gradio Interface
 def create_interface():
     """Create the Gradio interface"""
+    with gr.Blocks(title="Post Generator", theme=gr.themes.Soft()) as app:
         gr.Markdown("#Post Generator")
         gr.Markdown("Generate engaging LinkedIn posts from recent news articles using AI agents and NER analysis.")
     #Initialize the model
+    #print("Starting to initialize models")
+    #initialize_models()
+    #print("Models have been initialized")
     # Create and launch the app
     app = create_interface()

utils.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from tldextract import extract
+from urllib.parse import quote_plus
+import requests
+from bs4 import BeautifulSoup
+from io import BytesIO
+from PyPDF2 import PdfReader
+import urllib.parse
+def clean_url(url):
+    """Clean tracking parameters from URLs"""
+    url = url.split('&')[0]
+    url= url.rstrip('/')
+    # Decode the path to fix encoded '?' or '=' that belong to the path, not query
+    fixed_url = urllib.parse.unquote(url)
+    return fixed_url
+def get_body(url):
+    """Extract article content from URL"""
+    body_text = ""
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+            "Accept-Encoding": "gzip, deflate",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+        }
+        headers = {
+                   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+                  }
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        if url.endswith(".pdf") or "arxiv.org/pdf/" in url:
+            # PDF content
+            with BytesIO(response.content) as f:
+                reader = PdfReader(f)
+                text_parts = []
+                for page in reader.pages:
+                    text_parts.append(page.extract_text() or "")
+                body_text = "\n".join(text_parts)
+        else:
+            # HTML content
+            soup = BeautifulSoup(response.text, "html.parser")
+            paragraphs = soup.find_all(["p"])
+            body_text = " ".join([p.get_text(strip=True) for p in paragraphs])
+    except Exception as e:
+        print(f"Failed to fetch {url}: {e}")
+    return body_text
+def ner_tagger(text, model):
+    """Extract named entities from text"""
+    labels = ["Source", "Financial Metric", "Date", "Organization", "Person", "Product", "Percentage", "Monetary Value", "Duration"]
+    entities = model.predict_entities(text, labels, threshold=0.1)
+    return entities
+def remove_duplicate_relationships(data: str) -> str:
+    """Remove duplicate relationships from knowledge graph"""
+    lines = data.splitlines()
+    triples = []
+    subject = None
+    for line in lines:
+        parts = [part.strip() for part in line.split("-->")]
+        if len(parts) != 3:
+            continue
+        else:
+            if len(parts[0]) > 0:
+                subject = parts[0]
+            predicate = parts[1]
+            obj = parts[2]
+        triples.append((subject, predicate, obj))
+    unique_triples = sorted(set(triples))
+    grouped = defaultdict(list)
+    for subj, pred, obj in unique_triples:
+        grouped[subj].append(f"    -->{pred}--> {obj}")
+    output_lines = []
+    for subj in grouped:
+        output_lines.append(subj)
+        output_lines.extend(grouped[subj])
+    return '\n'.join(output_lines)