Spaces:

duaajaved321
/

MSDSF24M005_ver2

Sleeping

App Files Files Community

duaajaved321 commited on Mar 9

Commit

4e052d6

verified ·

1 Parent(s): e5af218

Upload 2 files

Browse files

Files changed (2) hide show

app.py +90 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import requests
+import yt_dlp
+import whisper
+import torch
+import gradio as gr
+from bs4 import BeautifulSoup
+from groq import Groq
+from youtube_transcript_api import YouTubeTranscriptApi
+# Setup using Hugging Face Secrets
+ZENROWS_KEY = os.environ.get("ZENROWS_KEY")
+GROQ_KEY = os.environ.get("GROQ_KEY")
+client = Groq(api_key=GROQ_KEY)
+# Detect Hardware
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🚀 Running on: {device.upper()}")
+# Load Whisper (This will happen when the Hugging Face Space starts)
+audio_model = whisper.load_model("base.en").to(device)
+cache = {"web_url": "", "web_content": "", "yt_url": "", "yt_content": ""}
+def get_website_content(url):
+    global cache
+    url = url.strip()
+    if url == cache["web_url"]: return cache["web_content"]
+    params = {"apikey": ZENROWS_KEY, "url": url, "js_render": "true", "premium_proxy": "true"}
+    try:
+        res = requests.get("https://api.zenrows.com/v1/", params=params, timeout=30)
+        soup = BeautifulSoup(res.text, "html.parser")
+        for junk in soup(["script", "style", "nav", "footer", "header"]): junk.decompose()
+        text = " ".join([p.text for p in soup.find_all('p')])
+        cache["web_url"], cache["web_content"] = url, text[:12000]
+        return cache["web_content"]
+    except Exception as e: return f"Error: {str(e)}"
+def get_youtube_content(url):
+    global cache
+    video_id = url.split("v=")[-1].split("&")[0].split("/")[-1].strip()
+    if video_id == cache["yt_url"]: return cache["yt_content"]
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        content = " ".join([i['text'] for i in transcript])
+    except:
+        ydl_opts = {
+            'format': 'm4a/bestaudio/best',
+            'outtmpl': 'temp_audio.%(ext)s',
+            'quiet': True,
+            'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'm4a'}],
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
+        result = audio_model.transcribe("temp_audio.m4a", fp16=(device=="cuda"))
+        content = result["text"]
+        if os.path.exists("temp_audio.m4a"): os.remove("temp_audio.m4a")
+    cache["yt_url"], cache["yt_content"] = video_id, content
+    return content
+def ask_ai(context, question):
+    if not context or len(context) < 20: return "❌ Error: No content found."
+    completion = client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {"role": "system", "content": f"Answer using ONLY this text:\n\n{context}"},
+            {"role": "user", "content": question}
+        ]
+    )
+    return completion.choices[0].message.content
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Ultimate AI Research Assistant (V2)")
+    with gr.Tabs():
+        with gr.TabItem("🌐 Website Analysis"):
+            url_w = gr.Textbox(label="Website URL")
+            q_w = gr.Textbox(label="Question")
+            out_w = gr.Textbox(label="AI Response")
+            gr.Button("Analyze Site").click(lambda u,q: ask_ai(get_website_content(u), q), [url_w, q_w], out_w)
+        with gr.TabItem("🎥 YouTube Analysis"):
+            url_y = gr.Textbox(label="YouTube URL")
+            q_y = gr.Textbox(label="Question")
+            out_y = gr.Textbox(label="AI Response")
+            gr.Button("Analyze Video").click(lambda u,q: ask_ai(get_youtube_content(u), q), [url_y, q_y], out_y)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+requests
+beautifulsoup4
+groq
+gradio
+yt-dlp
+openai-whisper
+youtube-transcript-api
+torch