Spaces:

duaajaved321
/

MSDSF24M005_ver2

Sleeping

App Files Files Community

MSDSF24M005_ver2 / app.py

duaajaved321

Upload 2 files

4e052d6 verified about 1 month ago

raw

history blame contribute delete

3.45 kB


	import os
	import requests
	import yt_dlp
	import whisper
	import torch
	import gradio as gr
	from bs4 import BeautifulSoup
	from groq import Groq
	from youtube_transcript_api import YouTubeTranscriptApi

	# Setup using Hugging Face Secrets
	ZENROWS_KEY = os.environ.get("ZENROWS_KEY")
	GROQ_KEY = os.environ.get("GROQ_KEY")

	client = Groq(api_key=GROQ_KEY)

	# Detect Hardware
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"🚀 Running on: {device.upper()}")

	# Load Whisper (This will happen when the Hugging Face Space starts)
	audio_model = whisper.load_model("base.en").to(device)

	cache = {"web_url": "", "web_content": "", "yt_url": "", "yt_content": ""}

	def get_website_content(url):
	global cache
	url = url.strip()
	if url == cache["web_url"]: return cache["web_content"]

	params = {"apikey": ZENROWS_KEY, "url": url, "js_render": "true", "premium_proxy": "true"}
	try:
	res = requests.get("https://api.zenrows.com/v1/", params=params, timeout=30)
	soup = BeautifulSoup(res.text, "html.parser")
	for junk in soup(["script", "style", "nav", "footer", "header"]): junk.decompose()
	text = " ".join([p.text for p in soup.find_all('p')])
	cache["web_url"], cache["web_content"] = url, text[:12000]
	return cache["web_content"]
	except Exception as e: return f"Error: {str(e)}"

	def get_youtube_content(url):
	global cache
	video_id = url.split("v=")[-1].split("&")[0].split("/")[-1].strip()
	if video_id == cache["yt_url"]: return cache["yt_content"]

	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	content = " ".join([i['text'] for i in transcript])
	except:
	ydl_opts = {
	'format': 'm4a/bestaudio/best',
	'outtmpl': 'temp_audio.%(ext)s',
	'quiet': True,
	'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'm4a'}],
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
	result = audio_model.transcribe("temp_audio.m4a", fp16=(device=="cuda"))
	content = result["text"]
	if os.path.exists("temp_audio.m4a"): os.remove("temp_audio.m4a")

	cache["yt_url"], cache["yt_content"] = video_id, content
	return content

	def ask_ai(context, question):
	if not context or len(context) < 20: return "❌ Error: No content found."
	completion = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role": "system", "content": f"Answer using ONLY this text:\n\n{context}"},
	{"role": "user", "content": question}
	]
	)
	return completion.choices[0].message.content

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 Ultimate AI Research Assistant (V2)")
	with gr.Tabs():
	with gr.TabItem("🌐 Website Analysis"):
	url_w = gr.Textbox(label="Website URL")
	q_w = gr.Textbox(label="Question")
	out_w = gr.Textbox(label="AI Response")
	gr.Button("Analyze Site").click(lambda u,q: ask_ai(get_website_content(u), q), [url_w, q_w], out_w)
	with gr.TabItem("🎥 YouTube Analysis"):
	url_y = gr.Textbox(label="YouTube URL")
	q_y = gr.Textbox(label="Question")
	out_y = gr.Textbox(label="AI Response")
	gr.Button("Analyze Video").click(lambda u,q: ask_ai(get_youtube_content(u), q), [url_y, q_y], out_y)

	demo.launch()