Spaces:

SujalChhajed925
/

yt-metadata-api

Sleeping

App Files Files Community

SujalChhajed925 commited on Dec 22, 2025

Commit

299265a

verified ·

1 Parent(s): d29b20b

Update main.py

Browse files

Files changed (1) hide show

main.py +67 -71

main.py CHANGED Viewed

@@ -1,72 +1,68 @@
-import nltk
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from transformers import AutoTokenizer, pipeline
-from rake_nltk import Rake
-# --- Setup & Model Loading ---
-app = FastAPI()
-# CRITICAL: Allow your Frontend to access this API
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins (Safe for public free APIs)
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-print("Loading Models...")
-# Download NLTK data
-try:
-    nltk.data.find('tokenizers/punkt_tab')
-except LookupError:
-    nltk.download('stopwords')
-    nltk.download('punkt_tab')
-# Load AI Models (Cached in memory)
-t5_tokenizer = AutoTokenizer.from_pretrained("Michau/t5-base-en-generate-headline", use_fast=False)
-title_pipe = pipeline("text2text-generation", model="Michau/t5-base-en-generate-headline", tokenizer=t5_tokenizer)
-bart_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn", use_fast=False)
-desc_pipe = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=bart_tokenizer)
-print("Models Ready!")
-# --- Logic ---
-class VideoInput(BaseModel):
-    text: str
-def get_tags(text):
-    rake = Rake()
-    rake.extract_keywords_from_text(text)
-    phrases = rake.get_ranked_phrases()[:5]
-    hashtags = ["#" + p.replace(" ", "") for p in phrases]
-    tags = [p.replace(" ", "") for p in phrases]
-    return hashtags, tags
-@app.get("/")
-def home():
-    return {"status": "API is running. POST to /generate"}
-@app.post("/generate")
-async def generate(payload: VideoInput):
-    text = payload.text
-    if not text.strip():
-        raise HTTPException(status_code=400, detail="Empty text")
-    # 1. Generate Title
-    title_out = title_pipe("headline: " + text, max_new_tokens=70, do_sample=False)
-    # 2. Generate Description
-    desc_out = desc_pipe(text, max_new_tokens=150, do_sample=False)
-    # 3. Get Tags
-    hashtags, tags = get_tags(text)
-    return {
-        "title": title_out[0]["generated_text"],
-        "description": desc_out[0]["summary_text"],
-        "hashtags": hashtags,
-        "tags": tags
     }

+import nltk
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from transformers import AutoTokenizer, pipeline
+from rake_nltk import Rake
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+print("Loading Models...")
+try:
+    nltk.data.find('tokenizers/punkt_tab')
+except LookupError:
+    nltk.download('stopwords')
+    nltk.download('punkt_tab')
+# 1. TITLE MODEL (Keep this, or swap to 't5-small' if still desperate)
+t5_tokenizer = AutoTokenizer.from_pretrained("Michau/t5-base-en-generate-headline", use_fast=False)
+title_pipe = pipeline("text2text-generation", model="Michau/t5-base-en-generate-headline", tokenizer=t5_tokenizer)
+# 2. DESCRIPTION MODEL (>>> CHANGED TO DISTILBART <<<)
+# This model is 3x faster and smaller than bart-large-cnn
+bart_tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6", use_fast=False)
+desc_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer=bart_tokenizer)
+print("Models Ready!")
+class VideoInput(BaseModel):
+    text: str
+def get_tags(text):
+    rake = Rake()
+    rake.extract_keywords_from_text(text)
+    phrases = rake.get_ranked_phrases()[:5]
+    hashtags = ["#" + p.replace(" ", "") for p in phrases]
+    tags = [p.replace(" ", "") for p in phrases]
+    return hashtags, tags
+@app.get("/")
+def home():
+    return {"status": "API is running."}
+@app.post("/generate")
+async def generate(payload: VideoInput):
+    text = payload.text
+    if not text.strip():
+        raise HTTPException(status_code=400, detail="Empty text")
+    # Lower max_new_tokens slightly to speed up generation
+    title_out = title_pipe("headline: " + text, max_new_tokens=50, do_sample=False)
+    desc_out = desc_pipe(text, max_new_tokens=100, do_sample=False)
+    hashtags, tags = get_tags(text)
+    return {
+        "title": title_out[0]["generated_text"],
+        "description": desc_out[0]["summary_text"],
+        "hashtags": hashtags,
+        "tags": tags
     }