ValakiJay1706 commited on
Commit
e27fba1
·
verified ·
1 Parent(s): 748cf67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -46
app.py CHANGED
@@ -1,65 +1,61 @@
1
  import gradio as gr
2
- import requests
3
- import os
4
 
5
  # --- Configuration ---
6
- # MODEL CHANGE: Using Microsoft's Phi-3-mini, which is confirmed to be on the free Inference API.
7
- MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
8
- API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
9
-
10
- # Get the Hugging Face token from the Space's secrets.
11
- HF_TOKEN = os.getenv("HF_TOKEN")
12
 
13
  # The "personality" of your bot.
14
  SYSTEM_PROMPT = """You are an expert viral video scriptwriter. Your sole function is to generate compelling video hooks. When a user gives you a topic, generate a list of 10 unique and powerful video hooks. Format the output as a numbered list and do not add any extra commentary."""
15
 
16
- # --- API Call Logic ---
17
- def query_api(payload):
18
- if not HF_TOKEN:
19
- raise ValueError("HF_TOKEN secret not found. Please add it to your Space's settings.")
20
-
21
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
22
- response = requests.post(API_URL, headers=headers, json=payload)
23
-
24
- if response.status_code == 503:
25
- estimated_time = response.json().get("estimated_time", 0)
26
- raise gr.Error(f"The model is currently loading. Please try again in {int(estimated_time) + 5} seconds.")
27
-
28
- response.raise_for_status()
29
- return response.json()
30
 
31
  # --- The Core Chat Logic ---
32
  def predict(message, history):
33
- # PROMPT FORMAT CHANGE: Phi-3 models use a specific format with <|user|> and <|assistant|> tags.
34
- # The system prompt is included within the user turn.
35
- full_prompt = f"<|user|>\n{SYSTEM_PROMPT}\n\nMy Topic: {message}<|end|>\n<|assistant|>"
36
-
37
- try:
38
- output = query_api({
39
- "inputs": full_prompt,
40
- "parameters": {
41
- "max_new_tokens": 1024,
42
- "return_full_text": False,
43
- "do_sample": True,
44
- "temperature": 0.7,
45
- "top_p": 0.95,
46
- }
47
- })
48
- assistant_response = output[0]['generated_text']
49
- return assistant_response
50
-
51
- except requests.exceptions.RequestException as e:
52
- return f"An API error occurred. This could be a temporary issue. Please try again. Details: {e}"
53
- except (KeyError, IndexError) as e:
54
- return f"Error parsing the API response: {e}. The model may have returned an unexpected format."
 
55
 
56
  # --- Gradio User Interface ---
57
  chatbot = gr.ChatInterface(
58
  predict,
59
  title="Viral Video Hook Generator",
60
- description="Give me a topic, and I'll generate 10 compelling video hooks for TikToks and Shorts.",
61
  theme="soft",
62
  examples=["Productivity hacks", "The history of coffee", "How to learn a new skill"],
63
  )
64
 
65
- chatbot.launch()
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
 
5
  # --- Configuration ---
6
+ # MODEL: Using TinyLlama, a model small enough to run on a free CPU Space.
7
+ # This approach is self-contained and does not use any external API.
8
+ MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
 
 
9
 
10
  # The "personality" of your bot.
11
  SYSTEM_PROMPT = """You are an expert viral video scriptwriter. Your sole function is to generate compelling video hooks. When a user gives you a topic, generate a list of 10 unique and powerful video hooks. Format the output as a numbered list and do not add any extra commentary."""
12
 
13
+ # --- Model Loading ---
14
+ # We use the 'pipeline' from transformers for a simple and robust way to run the model on CPU.
15
+ # This will take a few minutes to load the first time the Space starts.
16
+ try:
17
+ pipe = pipeline("text-generation",
18
+ model=MODEL_ID,
19
+ torch_dtype=torch.bfloat16,
20
+ device_map="auto")
21
+ except Exception as e:
22
+ raise gr.Error(f"Failed to load the model. Error: {e}")
 
 
 
 
23
 
24
  # --- The Core Chat Logic ---
25
  def predict(message, history):
26
+ # PROMPT FORMAT: TinyLlama uses a specific chat template.
27
+ messages = [
28
+ {"role": "system", "content": SYSTEM_PROMPT},
29
+ {"role": "user", "content": message}
30
+ ]
31
+
32
+ # Use the pipeline's built-in chat template feature to format the prompt correctly.
33
+ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
34
+
35
+ # Generate the response.
36
+ # We add a thinking status for the user because this will be slow.
37
+ gr.Info("Generating response... this may take up to 60 seconds.")
38
+
39
+ outputs = pipe(prompt,
40
+ max_new_tokens=1024,
41
+ do_sample=True,
42
+ temperature=0.7,
43
+ top_k=50,
44
+ top_p=0.95)
45
+
46
+ # Extract the generated text from the output.
47
+ assistant_response = outputs[0]["generated_text"].split("<|assistant|>")[-1].strip()
48
+ return assistant_response
49
 
50
  # --- Gradio User Interface ---
51
  chatbot = gr.ChatInterface(
52
  predict,
53
  title="Viral Video Hook Generator",
54
+ description="Give me a topic, and I'll generate 10 compelling video hooks. This app runs on free hardware, so please be patient with response times.",
55
  theme="soft",
56
  examples=["Productivity hacks", "The history of coffee", "How to learn a new skill"],
57
  )
58
 
59
+ # Launch the app!
60
+ if __name__ == "__main__":
61
+ chatbot.launch()