Spaces:

MightyOctopus
/

Qwen-mockup-data-generator

Sleeping

App Files Files Community

MightyOctopus commited on Sep 6, 2025

Commit

615ce65

verified ·

1 Parent(s): 91c414a

Create app.py

Browse files

Files changed (1) hide show

app.py +255 -0

app.py ADDED Viewed

	@@ -0,0 +1,255 @@

+##########====================================================================################
+##########====================PRODUCTION VERSION -- vLLM, GRADIO=====================###########
+##########====================================================================################
+import os
+import requests
+from typing import List, Dict, Tuple
+from datetime import datetime
+from anthropic import Anthropic
+from openai import OpenAI
+import time, gradio as gr
+from tqdm import tqdm
+ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
+assert ANTHROPIC_API_KEY, "Set ANTHROPIC_API_KEY in Space settings"
+VLLM_API = "http://localhost:8000/v1"
+QWEN_MODEL = "Qwen/Qwen3-4B-Instruct-2507"
+CLAUDE_MODEL = "claude-3-5-haiku-latest"
+open_source_client = OpenAI(api_key="EMPTY", base_url=VLLM_API)
+claude_client = Anthropic(api_key=ANTHROPIC_API_KEY)
+def invoke_messages(
+        rows_num: int,
+        business_category: str,
+        columns: str,
+        instruction: str,
+) -> List[Dict[str, str]]:
+    system_message = """
+        You are a helpful assistant generating synthetic mockup dataset as per
+        user's request across all types of businesses and sorts.
+        User's specific request for the data niche, data column types, and all
+        other details and your job is to create wonderful mockup data for them
+        to use for their demo apps or develop in a testing environment.
+    """.strip()
+    user_prompt = f"""
+        Generate a synthetic mockup data that fits the following instruction:
+        - Number of rows: {rows_num}
+        - Business area: {business_category}
+        - Columns: {columns}
+        - Other instruction: {instruction}
+        ㅡ Make sure to deliver only the markdown content without any additional comments
+    """.strip()
+    system_message = system_message + """
+        In the case of sql file selection as an output, make sure to
+        contain the full sql file format, including CREATE TABLE command.
+    """.strip()
+    messages = [
+        {"role": "system", "content": system_message},
+        {"role": "user", "content": user_prompt}
+    ]
+    return messages
+def pass_claude_msg(file_format: str, content: str) -> Tuple[str, str]:
+    claude_sys_msg = """
+        You are a helpful assistant, converting generated outputs (done by other model)
+        into the format of chosen type:
+        example: csv, sql, or json format.
+        NOTE: generate the result output that only includes the markdown content
+        without any addtional comments!
+    """.strip()
+    claude_user_msg = f"""
+        Convert the output into the {file_format} format for the following content:
+        ----------------------------------------------------------------------
+        {content}
+    """.strip()
+    return claude_sys_msg, claude_user_msg
+def generate_output(messages):
+    enable_model()
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        return_tensors="pt",
+        return_dict=True,  ### IMPORTANT: to get a mapping
+        tokenize=True,
+        add_generation_prompt=True,
+        padding=True,
+        return_attention_mask=True
+    ).to(model.device)
+    # print(inputs)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=400,
+        temperature=0.2
+    )
+    ### Get the length(num of tokens) of the input prompt
+    prompt_len = inputs["input_ids"].shape[1]
+    ### Slice the generated sequence to skip the prompt length
+    gen_tokens = outputs[0][prompt_len:]
+    # print(tokenizer.decode(gen_tokens, skip_special_tokens=True))
+    return gen_tokens
+def launch_claude_api(sys_msg, user_msg):
+    response = claude.messages.create(
+        model=CLAUDE_MODEL,
+        system=sys_msg,
+        max_tokens=400,
+        temperature=0.1,
+        messages=[
+            {"role": "user", "content": user_msg}
+        ]
+    )
+    return response.content[0].text
+###============= Gradio Function =============###
+def generate_mockup_data(category, num_data_rows, columns, a_instruction,
+                         progress=gr.Progress()):
+    progress(0.2, desc="Generating...")
+    msg = invoke_messages(
+        rows_num=int(num_data_rows or 10),
+        business_category=category,
+        columns=columns,
+        instruction=a_instruction
+    )
+    resp = open_source_client.chat.completions.create(
+        model=QWEN_MODEL,
+        messages=msg,
+        max_tokens=400,
+        temperature=0.2,
+        stream=False
+    )
+    progress(1.0, desc="Done")
+    return resp.choices[0].message.content
+def show_hidden_row():
+    return gr.update(visible=True)
+def make_file(btn_sort: str, category: str, content: str):
+    '''
+    btn_sort: one of the 3 download file tpes from the buttons -- download csv, sql, json
+    category: Business category or area that the data is associated with.
+    content: LLM generated text output to write in a file
+    '''
+    if not content or not content.strip():
+        raise gr.Error("The result content is empty. Cannot create a file.")
+    try:
+        sys_msg, user_msg = pass_claude_msg(btn_sort, content)
+        claude_output = launch_claude_api(sys_msg, user_msg)
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filepath = f"/tmp/{category}_mockup_{ts}.{btn_sort}"
+        with open(filepath, "w") as f:
+            f.write(claude_output)
+        return filepath
+    except Exception as e:
+        raise gr.Error("Failed to format or create the file.")
+###============= Gradio UI =============###
+def render_interface():
+    with gr.Blocks(title="Mockup Data Generator", css="footer {visibility:hidden}") as demo:
+        category = gr.Textbox(
+            label="Business Area/Category",
+            placeholder="e.g. HR, Sales, Hospitality, Senior Care, E-commerce, Finance",
+        )
+        num_data_rows = gr.Number(
+            label="Number of Rows",
+            placeholder="Type number...",
+            minimum=10,
+            maximum=50,
+            step=10,
+            precision=0
+        )
+        columns = gr.Textbox(
+            label="Insert Columns",
+            placeholder="Comma, separated..."
+        )
+        a_instruction = gr.Textbox(
+            label="Additional Instruction",
+            placeholder="Any additional instruction. Leave blank if none.",
+            lines=5
+        )
+        btn = gr.Button(
+            value="Generate"
+        )
+        out = gr.Textbox(label="Result shown here.")
+        buttons_row = gr.Row(visible=False)
+        with buttons_row:
+            btn_csv = gr.DownloadButton(label="Download csv", size="md", elem_classes=["download-btn"])
+            btn_sql = gr.DownloadButton(label="Download sql", size="md", elem_classes=["download-btn"])
+            btn_json = gr.DownloadButton(label="Download json", size="md", elem_classes=["download-btn"])
+        chain = btn.click(
+            fn=generate_mockup_data,
+            inputs=[category, num_data_rows, columns, a_instruction],
+            outputs=out,
+            queue=True
+        )
+        chain = chain.then(
+            fn=show_hidden_row,
+            inputs=None,
+            outputs=buttons_row,
+        )
+        btn_csv.click(
+            lambda category, data: make_file("csv", category, data),
+            inputs=[category, out],
+            outputs=btn_csv
+        )
+        btn_sql.click(
+            lambda category, data: make_file("sql", category, data),
+            inputs=[category, out],
+            outputs=btn_sql
+        )
+        btn_json.click(
+            lambda category, data: make_file("json", category, data),
+            inputs=[category, out],
+            outputs=btn_json
+        )
+        ### Pre-warming the model right upon the page load
+        ### in order to save the model load time when user submitting the form.
+        demo.load(lambda: enable_model(), queue=False)
+    return demo
+if __name__ == "__main__":
+    app = render_interface()
+    app.queue(default_concurrency_limit=1)
+    app.launch(server_name="0.0.0.0", server_port=7860)