Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import yt_dlp | |
| import whisper | |
| import torch | |
| import gradio as gr | |
| from bs4 import BeautifulSoup | |
| from groq import Groq | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| # Setup using Hugging Face Secrets | |
| ZENROWS_KEY = os.environ.get("ZENROWS_KEY") | |
| GROQ_KEY = os.environ.get("GROQ_KEY") | |
| client = Groq(api_key=GROQ_KEY) | |
| # Detect Hardware | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"π Running on: {device.upper()}") | |
| # Load Whisper (This will happen when the Hugging Face Space starts) | |
| audio_model = whisper.load_model("base.en").to(device) | |
| cache = {"web_url": "", "web_content": "", "yt_url": "", "yt_content": ""} | |
| def get_website_content(url): | |
| global cache | |
| url = url.strip() | |
| if url == cache["web_url"]: return cache["web_content"] | |
| params = {"apikey": ZENROWS_KEY, "url": url, "js_render": "true", "premium_proxy": "true"} | |
| try: | |
| res = requests.get("https://api.zenrows.com/v1/", params=params, timeout=30) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| for junk in soup(["script", "style", "nav", "footer", "header"]): junk.decompose() | |
| text = " ".join([p.text for p in soup.find_all('p')]) | |
| cache["web_url"], cache["web_content"] = url, text[:12000] | |
| return cache["web_content"] | |
| except Exception as e: return f"Error: {str(e)}" | |
| def get_youtube_content(url): | |
| global cache | |
| video_id = url.split("v=")[-1].split("&")[0].split("/")[-1].strip() | |
| if video_id == cache["yt_url"]: return cache["yt_content"] | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| content = " ".join([i['text'] for i in transcript]) | |
| except: | |
| ydl_opts = { | |
| 'format': 'm4a/bestaudio/best', | |
| 'outtmpl': 'temp_audio.%(ext)s', | |
| 'quiet': True, | |
| 'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'm4a'}], | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) | |
| result = audio_model.transcribe("temp_audio.m4a", fp16=(device=="cuda")) | |
| content = result["text"] | |
| if os.path.exists("temp_audio.m4a"): os.remove("temp_audio.m4a") | |
| cache["yt_url"], cache["yt_content"] = video_id, content | |
| return content | |
| def ask_ai(context, question): | |
| if not context or len(context) < 20: return "β Error: No content found." | |
| completion = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": f"Answer using ONLY this text:\n\n{context}"}, | |
| {"role": "user", "content": question} | |
| ] | |
| ) | |
| return completion.choices[0].message.content | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π Ultimate AI Research Assistant (V2)") | |
| with gr.Tabs(): | |
| with gr.TabItem("π Website Analysis"): | |
| url_w = gr.Textbox(label="Website URL") | |
| q_w = gr.Textbox(label="Question") | |
| out_w = gr.Textbox(label="AI Response") | |
| gr.Button("Analyze Site").click(lambda u,q: ask_ai(get_website_content(u), q), [url_w, q_w], out_w) | |
| with gr.TabItem("π₯ YouTube Analysis"): | |
| url_y = gr.Textbox(label="YouTube URL") | |
| q_y = gr.Textbox(label="Question") | |
| out_y = gr.Textbox(label="AI Response") | |
| gr.Button("Analyze Video").click(lambda u,q: ask_ai(get_youtube_content(u), q), [url_y, q_y], out_y) | |
| demo.launch() | |