duaajaved321 commited on
Commit
4e052d6
Β·
verified Β·
1 Parent(s): e5af218

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +90 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import requests
4
+ import yt_dlp
5
+ import whisper
6
+ import torch
7
+ import gradio as gr
8
+ from bs4 import BeautifulSoup
9
+ from groq import Groq
10
+ from youtube_transcript_api import YouTubeTranscriptApi
11
+
12
+ # Setup using Hugging Face Secrets
13
+ ZENROWS_KEY = os.environ.get("ZENROWS_KEY")
14
+ GROQ_KEY = os.environ.get("GROQ_KEY")
15
+
16
+ client = Groq(api_key=GROQ_KEY)
17
+
18
+ # Detect Hardware
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ print(f"πŸš€ Running on: {device.upper()}")
21
+
22
+ # Load Whisper (This will happen when the Hugging Face Space starts)
23
+ audio_model = whisper.load_model("base.en").to(device)
24
+
25
+ cache = {"web_url": "", "web_content": "", "yt_url": "", "yt_content": ""}
26
+
27
+ def get_website_content(url):
28
+ global cache
29
+ url = url.strip()
30
+ if url == cache["web_url"]: return cache["web_content"]
31
+
32
+ params = {"apikey": ZENROWS_KEY, "url": url, "js_render": "true", "premium_proxy": "true"}
33
+ try:
34
+ res = requests.get("https://api.zenrows.com/v1/", params=params, timeout=30)
35
+ soup = BeautifulSoup(res.text, "html.parser")
36
+ for junk in soup(["script", "style", "nav", "footer", "header"]): junk.decompose()
37
+ text = " ".join([p.text for p in soup.find_all('p')])
38
+ cache["web_url"], cache["web_content"] = url, text[:12000]
39
+ return cache["web_content"]
40
+ except Exception as e: return f"Error: {str(e)}"
41
+
42
+ def get_youtube_content(url):
43
+ global cache
44
+ video_id = url.split("v=")[-1].split("&")[0].split("/")[-1].strip()
45
+ if video_id == cache["yt_url"]: return cache["yt_content"]
46
+
47
+ try:
48
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
49
+ content = " ".join([i['text'] for i in transcript])
50
+ except:
51
+ ydl_opts = {
52
+ 'format': 'm4a/bestaudio/best',
53
+ 'outtmpl': 'temp_audio.%(ext)s',
54
+ 'quiet': True,
55
+ 'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'm4a'}],
56
+ }
57
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
58
+ result = audio_model.transcribe("temp_audio.m4a", fp16=(device=="cuda"))
59
+ content = result["text"]
60
+ if os.path.exists("temp_audio.m4a"): os.remove("temp_audio.m4a")
61
+
62
+ cache["yt_url"], cache["yt_content"] = video_id, content
63
+ return content
64
+
65
+ def ask_ai(context, question):
66
+ if not context or len(context) < 20: return "❌ Error: No content found."
67
+ completion = client.chat.completions.create(
68
+ model="llama-3.3-70b-versatile",
69
+ messages=[
70
+ {"role": "system", "content": f"Answer using ONLY this text:\n\n{context}"},
71
+ {"role": "user", "content": question}
72
+ ]
73
+ )
74
+ return completion.choices[0].message.content
75
+
76
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
+ gr.Markdown("# πŸš€ Ultimate AI Research Assistant (V2)")
78
+ with gr.Tabs():
79
+ with gr.TabItem("🌐 Website Analysis"):
80
+ url_w = gr.Textbox(label="Website URL")
81
+ q_w = gr.Textbox(label="Question")
82
+ out_w = gr.Textbox(label="AI Response")
83
+ gr.Button("Analyze Site").click(lambda u,q: ask_ai(get_website_content(u), q), [url_w, q_w], out_w)
84
+ with gr.TabItem("πŸŽ₯ YouTube Analysis"):
85
+ url_y = gr.Textbox(label="YouTube URL")
86
+ q_y = gr.Textbox(label="Question")
87
+ out_y = gr.Textbox(label="AI Response")
88
+ gr.Button("Analyze Video").click(lambda u,q: ask_ai(get_youtube_content(u), q), [url_y, q_y], out_y)
89
+
90
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ requests
2
+ beautifulsoup4
3
+ groq
4
+ gradio
5
+ yt-dlp
6
+ openai-whisper
7
+ youtube-transcript-api
8
+ torch