Spaces:

HOKGroup
/

marketing_chat

Sleeping

App Files Files Community

ashkoff commited on Mar 26, 2025

Commit

aea562e

1 Parent(s): e910624

updates

Browse files

Files changed (2) hide show

.gitignore +4 -0
app-ref.py +0 -358

.gitignore CHANGED Viewed

@@ -4,3 +4,7 @@ uv.lock
 .python-version
 __pycache__
 *.log

 .python-version
 __pycache__
 *.log
+.vscode
+.cursor
+.idml
+.csv

app-ref.py DELETED Viewed

@@ -1,358 +0,0 @@
-from dotenv import load_dotenv
-import os
-from datetime import datetime
-from simple_idml import idml
-import re
-import shutil
-import tempfile
-import json
-import gradio as gr
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.output_parsers import JsonOutputParser
-from langchain_core.prompts import PromptTemplate
-import docx2txt
-# imports
-load_dotenv()
-os.getenv("GOOGLE_API_KEY")
-def read_docx(path):
-    text = docx2txt.process(path)
-    return text
-def process_transcription(file, marketing_prompt, metrics_prompt):
-    # Read the uploaded file
-    transcription_text = read_docx(file)
-    # Initialize parser and model
-    parser = JsonOutputParser()
-    model = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash",
-        temperature=0,
-        max_tokens=None,
-        timeout=None,
-        max_retries=2,
-    )
-    # Process marketing copy
-    marketing_prompt_template = PromptTemplate(
-        template="""Transform the following transcription into engaging marketing copy.
-Follow these guidelines only and only give the body text: {marketing_prompt}
-Transcription:
-{transcription_text}
-Generate marketing copy that is compelling and aligned with the provided guidelines.
-Focus on key benefits, unique selling points, and engaging narrative.""",
-        input_variables=["transcription_text", "marketing_prompt"],
-    )
-    marketing_chain = marketing_prompt_template | model
-    marketing_result = marketing_chain.invoke(
-        {"transcription_text": transcription_text, "marketing_prompt": marketing_prompt}
-    )
-    # Process project metrics
-    metrics_prompt_template = PromptTemplate(
-        template="""Extract project metrics and statistics from the following transcription.
-Focus on these aspects: metrics should only and only be the in proper format avoid adding any description or other things if there is nothing can be found do not put in the output
-for consultants and and project team only and only output a result if there is a first name and last or a entity name can be found. avoid general name such as structural consultants, lighting consultant etc.  outputs should be a list of strings for the names
- only use these keys when possible and relevant location, project_name, size, height, number_of_floors, completion_date, client_name, project_team_members, external_consultants
-{metrics_prompt}
-Transcription:
-{transcription_text}
-Generate a JSON object containing the extracted metrics and statistics.
-Be specific and quantitative where possible.""",
-        input_variables=["transcription_text", "metrics_prompt"],
-    )
-    metrics_chain = metrics_prompt_template | model | parser
-    metrics_result = metrics_chain.invoke(
-        {"transcription_text": transcription_text, "metrics_prompt": metrics_prompt}
-    )
-    # Format metrics result for display
-    metrics_result["description"] = marketing_result.content
-    # formatted_metrics = json.dumps(metrics_result, indent=2)
-    return metrics_result
-def find_story_files(idml_package, tag_patterns):
-    """
-    Find story files containing specific tags
-    Args:
-        idml_package: The IDML package
-        tag_patterns: List of tag patterns to search for
-    Returns:
-        dict: Mapping of tag patterns to story files
-    """
-    compiled_patterns = {pattern: re.compile(pattern) for pattern in tag_patterns}
-    tag_to_story = {pattern: [] for pattern in tag_patterns}
-    stories = [name for name in idml_package.namelist() if name.startswith("Stories/")]
-    for story_path in stories:
-        try:
-            content = idml_package.open(story_path).read().decode("utf-8")
-            for pattern, regex in compiled_patterns.items():
-                if regex.search(content):
-                    tag_to_story[pattern].append(story_path)
-        except Exception as e:
-            print(f"Error reading {story_path}: {e}")
-    return tag_to_story
-def replace_content(xml_content, tag_pattern, replacements):
-    """
-    Replace content tags with actual data
-    Args:
-        xml_content: The XML content to modify
-        tag_pattern: The regex pattern to match tags
-        replacements: List of replacement values
-    Returns:
-        str: Updated XML content
-    """
-    tags = re.finditer(tag_pattern, xml_content)
-    tag_positions = [(m.start(), m.end()) for m in tags]
-    if not tag_positions:
-        return xml_content
-    content_chars = list(xml_content)
-    for i, (start, end) in enumerate(reversed(tag_positions)):
-        index = len(tag_positions) - 1 - i  # Reverse index
-        if index < len(replacements):
-            # Replace with actual data
-            new_content = f"<Content>{replacements[index]}</Content>"
-            content_chars[start:end] = new_content
-        else:
-            br_pattern = r"\s*<Br />"
-            br_match = re.search(br_pattern, "".join(content_chars[end : end + 20]))
-            if br_match:
-                del content_chars[start : end + br_match.end()]
-            else:
-                del content_chars[start:end]
-    if len(replacements) > len(tag_positions) and tag_positions:
-        last_pos = tag_positions[-1][1]
-        for item in replacements[len(tag_positions) :]:
-            insert_content = f"\n<Content>{item}</Content>\n<Br />"
-            content_chars.insert(last_pos, insert_content)
-            last_pos += len(insert_content)
-    return "".join(content_chars)
-def update_idml_content(idml_path, replacements_json):
-    """
-    Update IDML content with replacements from JSON
-    Args:
-        idml_path: Path to the IDML file
-        replacements_json: JSON string or dict with tag patterns and replacements
-    Returns:
-        str: Path to the updated IDML file
-    """
-    # Parse JSON if it's a string
-    if isinstance(replacements_json, str):
-        replacements = json.loads(replacements_json)
-    else:
-        replacements = replacements_json
-    # Get the directory where app.py is located
-    app_dir = os.path.dirname(os.path.abspath(__file__))
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Create a copy of the IDML file to work with
-        temp_idml = os.path.join(temp_dir, "temp.idml")
-        shutil.copy2(idml_path, temp_idml)
-        with idml.IDMLPackage(temp_idml) as working_idml:
-            # Find all story files containing our tags
-            tag_patterns = list(replacements.keys())
-            tag_to_story = find_story_files(working_idml, tag_patterns)
-            # Extract the IDML
-            extract_dir = os.path.join(temp_dir, "extracted")
-            os.makedirs(extract_dir, exist_ok=True)
-            working_idml.extractall(extract_dir)
-            # Process each tag pattern
-            for tag_pattern, replacement_values in replacements.items():
-                story_files = tag_to_story.get(tag_pattern, [])
-                if not story_files:
-                    print(
-                        f"Warning: No story files found containing pattern '{tag_pattern}'"
-                    )
-                    continue
-                print(
-                    f"Found pattern '{tag_pattern}' in {len(story_files)} story file(s)"
-                )
-                # Update each story file containing this tag
-                for story_path in story_files:
-                    # Read the XML content
-                    with open(
-                        os.path.join(extract_dir, story_path), "r", encoding="utf-8"
-                    ) as f:
-                        xml_content = f.read()
-                    # Update the content
-                    updated_content = replace_content(
-                        xml_content, tag_pattern, replacement_values
-                    )
-                    # Write back the updated content
-                    with open(
-                        os.path.join(extract_dir, story_path), "w", encoding="utf-8"
-                    ) as f:
-                        f.write(updated_content)
-            # Create the output path in the same directory as app.py
-            base_name = os.path.splitext(os.path.basename(idml_path))[0]
-            output_filename = (
-                f"{base_name}_filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.idml"
-            )
-            output_path = os.path.join(app_dir, output_filename)
-            # Create a new IDML with the updated content
-            shutil.make_archive(output_path, "zip", extract_dir)
-            os.rename(output_path + ".zip", output_path)
-            print(f"Updated IDML saved to: {output_path}")
-            return output_path
-def create_replacements_from_metrics(metrics_data):
-    """
-    Convert metrics data to the replacements dictionary format
-    Args:
-        metrics_data: Dictionary containing project metrics
-    Returns:
-        dict: Mapping of tag patterns to replacement values
-    """
-    # Define mappings between metrics keys and IDML tag patterns
-    replacements = {
-        # Project Description
-        r"<Content>&lt;Description&gt;</Content>": [
-            metrics_data.get("description", "")
-        ],
-        # Project name
-        r"<Content>&lt;Project Name&gt;</Content>": [
-            metrics_data.get("project_name", "")
-        ],
-        # Location
-        r"<Content>&lt;Location&gt;</Content>": [metrics_data.get("location", "")],
-        # Size/Area
-        r"<Content>&lt;Area&gt; SF</Content>": [metrics_data.get("size", "")],
-        # Number of floors
-        r"<Content>&lt;NumFloors&gt;</Content>": [
-            metrics_data.get("number_of_floors", "")
-        ],
-        # Completion date
-        r"<Content>&lt;DateComplete&gt; \(&lt;Phase&gt;\)</Content>": [
-            f"{metrics_data.get('completion_date', '')}"
-        ],
-        # Client
-        r"<Content>&lt;Client&gt;</Content>": [metrics_data.get("client_name", "")],
-        # Team members - format each with a placeholder role
-        r"<Content>&lt;TEAM\d+&gt; \(&lt;Role\d+&gt;\)</Content>": [
-            f"{member} " for member in metrics_data.get("project_team_members", [])
-        ],
-        # Consultants
-        r"<Content>&lt;Consultant\d+&gt;</Content>": [
-            consultant for consultant in metrics_data.get("external_consultants", [])
-        ],
-    }
-    return replacements
-def process_and_update_idml(file, marketing_prompt, metrics_prompt, idml_path):
-    try:
-        # Ensure file has an extension
-        if not os.path.splitext(file.name)[1]:
-            temp_file = tempfile.NamedTemporaryFile(suffix=".docx", delete=False)
-            shutil.copy2(file.name, temp_file.name)
-            file = temp_file.name
-        # Process the transcription
-        results = process_transcription(file, marketing_prompt, metrics_prompt)
-        # Ensure IDML file has extension
-        if not os.path.splitext(idml_path.name)[1]:
-            temp_idml = tempfile.NamedTemporaryFile(suffix=".idml", delete=False)
-            shutil.copy2(idml_path.name, temp_idml.name)
-            idml_path = temp_idml.name
-        # Prepare the replacements dictionary
-        replacements = create_replacements_from_metrics(results)
-        # Update the IDML file
-        output_path = update_idml_content(idml_path, replacements)
-        return (
-            f"Successfully processed and updated IDML. Output saved to: {output_path}",
-            json.dumps(results, indent=2),
-            output_path,
-        )
-    except Exception as e:
-        error_json = {
-            "error": str(e),
-            "description": "An error occurred during processing",
-        }
-        return f"Error: {str(e)}", json.dumps(error_json, indent=2), None
-# Create Gradio interface
-iface = gr.Interface(
-    fn=process_and_update_idml,
-    inputs=[
-        gr.File(label="Upload Transcription File (DOCX)", file_types=[".docx"]),
-        gr.Textbox(
-            label="Marketing Prompt",
-            value="create short paragraph with friendly tone focusing on the sustainability aspects of the project",
-            lines=3,
-        ),
-        gr.Textbox(
-            label="Metrics Prompt",
-            value="extract project name, location, Size in square feet, number of floors, total height, completion date, client name, project team members name and any external consultants",
-            lines=3,
-        ),
-        gr.File(label="Upload indesign template (idml)", file_types=[".idml"]),
-    ],
-    outputs=[
-        gr.Textbox(label="IDML Update Status", lines=2),
-        gr.JSON(label="Transcription Results"),
-        gr.File(label="Download Updated IDML"),
-    ],
-    title="Marketing Transcription Processor",
-    description="Upload a transcription file and IDML template to generate marketing content and update the IDML file.",
-)
-iface.launch()