duaajaved321's picture
Upload 2 files
4e052d6 verified
import os
import requests
import yt_dlp
import whisper
import torch
import gradio as gr
from bs4 import BeautifulSoup
from groq import Groq
from youtube_transcript_api import YouTubeTranscriptApi
# Setup using Hugging Face Secrets
ZENROWS_KEY = os.environ.get("ZENROWS_KEY")
GROQ_KEY = os.environ.get("GROQ_KEY")
client = Groq(api_key=GROQ_KEY)
# Detect Hardware
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"πŸš€ Running on: {device.upper()}")
# Load Whisper (This will happen when the Hugging Face Space starts)
audio_model = whisper.load_model("base.en").to(device)
cache = {"web_url": "", "web_content": "", "yt_url": "", "yt_content": ""}
def get_website_content(url):
global cache
url = url.strip()
if url == cache["web_url"]: return cache["web_content"]
params = {"apikey": ZENROWS_KEY, "url": url, "js_render": "true", "premium_proxy": "true"}
try:
res = requests.get("https://api.zenrows.com/v1/", params=params, timeout=30)
soup = BeautifulSoup(res.text, "html.parser")
for junk in soup(["script", "style", "nav", "footer", "header"]): junk.decompose()
text = " ".join([p.text for p in soup.find_all('p')])
cache["web_url"], cache["web_content"] = url, text[:12000]
return cache["web_content"]
except Exception as e: return f"Error: {str(e)}"
def get_youtube_content(url):
global cache
video_id = url.split("v=")[-1].split("&")[0].split("/")[-1].strip()
if video_id == cache["yt_url"]: return cache["yt_content"]
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
content = " ".join([i['text'] for i in transcript])
except:
ydl_opts = {
'format': 'm4a/bestaudio/best',
'outtmpl': 'temp_audio.%(ext)s',
'quiet': True,
'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'm4a'}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
result = audio_model.transcribe("temp_audio.m4a", fp16=(device=="cuda"))
content = result["text"]
if os.path.exists("temp_audio.m4a"): os.remove("temp_audio.m4a")
cache["yt_url"], cache["yt_content"] = video_id, content
return content
def ask_ai(context, question):
if not context or len(context) < 20: return "❌ Error: No content found."
completion = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "system", "content": f"Answer using ONLY this text:\n\n{context}"},
{"role": "user", "content": question}
]
)
return completion.choices[0].message.content
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸš€ Ultimate AI Research Assistant (V2)")
with gr.Tabs():
with gr.TabItem("🌐 Website Analysis"):
url_w = gr.Textbox(label="Website URL")
q_w = gr.Textbox(label="Question")
out_w = gr.Textbox(label="AI Response")
gr.Button("Analyze Site").click(lambda u,q: ask_ai(get_website_content(u), q), [url_w, q_w], out_w)
with gr.TabItem("πŸŽ₯ YouTube Analysis"):
url_y = gr.Textbox(label="YouTube URL")
q_y = gr.Textbox(label="Question")
out_y = gr.Textbox(label="AI Response")
gr.Button("Analyze Video").click(lambda u,q: ask_ai(get_youtube_content(u), q), [url_y, q_y], out_y)
demo.launch()