Spaces:

ValakiJay1706
/

Whop

Sleeping

App Files Files Community

ValakiJay1706 commited on Jul 9, 2025

Commit

e27fba1

verified ·

1 Parent(s): 748cf67

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -46

app.py CHANGED Viewed

@@ -1,65 +1,61 @@
 import gradio as gr
-import requests
-import os
 # --- Configuration ---
-# MODEL CHANGE: Using Microsoft's Phi-3-mini, which is confirmed to be on the free Inference API.
-MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
-API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
-# Get the Hugging Face token from the Space's secrets.
-HF_TOKEN = os.getenv("HF_TOKEN")
 # The "personality" of your bot.
 SYSTEM_PROMPT = """You are an expert viral video scriptwriter. Your sole function is to generate compelling video hooks. When a user gives you a topic, generate a list of 10 unique and powerful video hooks. Format the output as a numbered list and do not add any extra commentary."""
-# --- API Call Logic ---
-def query_api(payload):
-    if not HF_TOKEN:
-        raise ValueError("HF_TOKEN secret not found. Please add it to your Space's settings.")
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-    response = requests.post(API_URL, headers=headers, json=payload)
-    if response.status_code == 503:
-        estimated_time = response.json().get("estimated_time", 0)
-        raise gr.Error(f"The model is currently loading. Please try again in {int(estimated_time) + 5} seconds.")
-    response.raise_for_status()
-    return response.json()
 # --- The Core Chat Logic ---
 def predict(message, history):
-    # PROMPT FORMAT CHANGE: Phi-3 models use a specific format with <|user|> and <|assistant|> tags.
-    # The system prompt is included within the user turn.
-    full_prompt = f"<|user|>\n{SYSTEM_PROMPT}\n\nMy Topic: {message}<|end|>\n<|assistant|>"
-    try:
-        output = query_api({
-            "inputs": full_prompt,
-            "parameters": {
-                "max_new_tokens": 1024,
-                "return_full_text": False,
-                "do_sample": True,
-                "temperature": 0.7,
-                "top_p": 0.95,
-            }
-        })
-        assistant_response = output[0]['generated_text']
-        return assistant_response
-    except requests.exceptions.RequestException as e:
-        return f"An API error occurred. This could be a temporary issue. Please try again. Details: {e}"
-    except (KeyError, IndexError) as e:
-        return f"Error parsing the API response: {e}. The model may have returned an unexpected format."
 # --- Gradio User Interface ---
 chatbot = gr.ChatInterface(
     predict,
     title="Viral Video Hook Generator",
-    description="Give me a topic, and I'll generate 10 compelling video hooks for TikToks and Shorts.",
     theme="soft",
     examples=["Productivity hacks", "The history of coffee", "How to learn a new skill"],
 )
-chatbot.launch()

 import gradio as gr
+import torch
+from transformers import pipeline
 # --- Configuration ---
+# MODEL: Using TinyLlama, a model small enough to run on a free CPU Space.
+# This approach is self-contained and does not use any external API.
+MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 # The "personality" of your bot.
 SYSTEM_PROMPT = """You are an expert viral video scriptwriter. Your sole function is to generate compelling video hooks. When a user gives you a topic, generate a list of 10 unique and powerful video hooks. Format the output as a numbered list and do not add any extra commentary."""
+# --- Model Loading ---
+# We use the 'pipeline' from transformers for a simple and robust way to run the model on CPU.
+# This will take a few minutes to load the first time the Space starts.
+try:
+    pipe = pipeline("text-generation",
+                    model=MODEL_ID,
+                    torch_dtype=torch.bfloat16,
+                    device_map="auto")
+except Exception as e:
+    raise gr.Error(f"Failed to load the model. Error: {e}")
 # --- The Core Chat Logic ---
 def predict(message, history):
+    # PROMPT FORMAT: TinyLlama uses a specific chat template.
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": message}
+    ]
+    # Use the pipeline's built-in chat template feature to format the prompt correctly.
+    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # Generate the response.
+    # We add a thinking status for the user because this will be slow.
+    gr.Info("Generating response... this may take up to 60 seconds.")
+    outputs = pipe(prompt,
+                   max_new_tokens=1024,
+                   do_sample=True,
+                   temperature=0.7,
+                   top_k=50,
+                   top_p=0.95)
+    # Extract the generated text from the output.
+    assistant_response = outputs[0]["generated_text"].split("<|assistant|>")[-1].strip()
+    return assistant_response
 # --- Gradio User Interface ---
 chatbot = gr.ChatInterface(
     predict,
     title="Viral Video Hook Generator",
+    description="Give me a topic, and I'll generate 10 compelling video hooks. This app runs on free hardware, so please be patient with response times.",
     theme="soft",
     examples=["Productivity hacks", "The history of coffee", "How to learn a new skill"],
 )
+# Launch the app!
+if __name__ == "__main__":
+    chatbot.launch()