Spaces:

GabrielSalem
/

PublicAlpha

Sleeping

App Files Files Community

GabrielSalem commited on Nov 28, 2025

Commit

32ea916

verified ·

1 Parent(s): 9da0170

Create app.py

Browse files

Files changed (1) hide show

app.py +194 -0

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+# app.py
+import os
+import time
+import requests
+from datetime import datetime
+from typing import List
+import gradio as gr
+from openai import OpenAI
+# -----------------------
+# Configuration (env)
+# -----------------------
+# Set these in the HF Space secrets / environment (DO NOT hardcode keys)
+SCRAPER_API_URL = os.getenv("SCRAPER_API_URL", "https://deep-scraper-96.created.app/api/deep-scrape")
+SCRAPER_HEADERS = {
+    "User-Agent": "Mozilla/5.0",
+    "Content-Type": "application/json",
+}
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # required
+OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1")  # optional override
+LLM_MODEL = os.getenv("LLM_MODEL", "openai/gpt-oss-20b:free")  # default from your snippet
+if not OPENAI_API_KEY:
+    # Don't crash UI import — we'll show a clear message when trying to run
+    client = None
+else:
+    client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY)
+# PROMPT template (kept similar to your original, but avoid repeating keys inline)
+PROMPT_TEMPLATE = """You are AURA, an advanced hedge fund analysis engine.
+Analyze ALL the following data deeply and output clearly in text (no JSON).
+extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not
+For each company, include:
+1. Company Name, Sector, Country
+2. Hedge Fund Investors (names + amounts if found)
+3. Insider Transactions (who bought/sold, when, how much)
+4. Reasons Hedge Funds Invest (3–6 tangible points)
+5. Risk Notes (1–3 key concerns)
+6. Boom Potential: High / Medium / Low
+7. Investment Strategy:
+   - Entry timing (now, on dip, post-earnings, etc.)
+   - Strategy type (growth, momentum, value, defensive, options)
+   - Holding period (short/medium/long)
+   - Exit signals (2–3 concrete ones)
+   - for each stock provide an investment strategy and investment model how to invest and when how much to wait and approximation of what will be earned
+8. Correlations (hedge fund behavior vs fundamentals)
+9. Global Trend Conclusion (3–5 hedge fund behavior patterns)
+10. Add a 1–2 min video narration script summarizing everything engagingly and professionally.
+Be detailed, analytical, and use professional formatting.
+extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not
+"""
+# -----------------------
+# Scraping helpers
+# -----------------------
+def deep_scrape(query: str, retries: int = 3, timeout: int = 60) -> str:
+    """Query SCRAPER_API_URL and return aggregated readable text."""
+    payload = {"query": query}
+    last_err = None
+    for attempt in range(1, retries + 1):
+        try:
+            resp = requests.post(SCRAPER_API_URL, headers=SCRAPER_HEADERS, json=payload, timeout=timeout)
+            resp.raise_for_status()
+            result = resp.json()
+            # Format result into text
+            if isinstance(result, dict):
+                parts = []
+                for k, v in result.items():
+                    parts.append(f"{k.upper()}:\n{v}\n")
+                return "\n".join(parts)
+            else:
+                return str(result)
+        except Exception as e:
+            last_err = e
+            if attempt < retries:
+                time.sleep(2)
+            else:
+                return f"ERROR: {e}"
+    return f"ERROR: {last_err}"
+def multi_scrape(queries: List[str], delay: float = 1.0) -> str:
+    """Scrape multiple queries and join results."""
+    results = []
+    for q in queries:
+        q = q.strip()
+        if not q:
+            continue
+        results.append(f"\n=== DATA FROM QUERY: {q.upper()} ===\n")
+        data = deep_scrape(q)
+        results.append(data)
+        time.sleep(delay)
+    return "\n".join(results)
+# -----------------------
+# LLM analysis
+# -----------------------
+def analyze_hedgefund_investments(raw_text: str, model: str = None, max_tokens: int = 8000):
+    """Call the configured OpenAI client chat completion endpoint."""
+    if client is None:
+        return "ERROR: OPENAI_API_KEY not set in environment."
+    try:
+        model = model or LLM_MODEL
+        # Keep messages concise: system prompt then user content.
+        completion = client.chat.completions.create(
+            extra_headers={"X-Title": "MyQuantApp"},
+            model=model,
+            messages=[
+                {"role": "system", "content": PROMPT_TEMPLATE},
+                {"role": "user", "content": raw_text},
+            ],
+            max_tokens=max_tokens,
+        )
+        # Safety: check structure
+        if hasattr(completion, "choices") and len(completion.choices) > 0:
+            # Newer SDK returns choices[].message.content
+            try:
+                return completion.choices[0].message.content
+            except Exception:
+                return str(completion.choices[0])
+        return str(completion)
+    except Exception as e:
+        return f"ERROR during LLM analysis: {e}"
+# -----------------------
+# Pipeline used by Gradio
+# -----------------------
+def run_pipeline(topics_text: str, delay: float, model_name: str, max_tokens: int):
+    """
+    topics_text: newline separated list of queries
+    delay: seconds between scrapes
+    model_name: model to pass to LLM (optional)
+    max_tokens: max tokens for LLM response
+    """
+    if not topics_text.strip():
+        return "No topics provided.", ""
+    queries = [line.strip() for line in topics_text.splitlines() if line.strip()]
+    start_ts = datetime.utcnow().isoformat() + "Z"
+    header = f"PIPELINE START: {start_ts}\nScraper URL: {SCRAPER_API_URL}\n\n"
+    scraped = multi_scrape(queries, delay=delay)
+    if scraped.startswith("ERROR"):
+        return header + scraped, ""
+    analysis = analyze_hedgefund_investments(scraped, model=model_name or LLM_MODEL, max_tokens=max_tokens)
+    footer_ts = datetime.utcnow().isoformat() + "Z"
+    header += f"\n=== SCRAPED DATA (preview) ===\n"
+    # Keep scraped preview limited to avoid UI overload
+    preview = scraped[:20000] + ("\n\n...[TRUNCATED]" if len(scraped) > 20000 else "")
+    result_scraped = header + preview + f"\n\n=== END SCRAPED PREVIEW ===\nGenerated: {footer_ts}\n"
+    return result_scraped, analysis
+# -----------------------
+# Gradio UI
+# -----------------------
+with gr.Blocks(title="AURA — Hedge Fund Analysis (Scraper + LLM)") as demo:
+    gr.Markdown(
+        """
+        # AURA — Hedge Fund Analysis (Gradio)
+        Enter newline-separated queries (e.g. "SEC insider transactions october 2025", "13F filings Q3 2025") and press **Run**.
+        **Important:** Set environment variables `OPENAI_API_KEY` (and optionally `OPENAI_BASE_URL`, `SCRAPER_API_URL`) in your Space secrets.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            topics = gr.Textbox(lines=8, label="Queries (one per line)", placeholder="e.g.\nSEC insider transactions october 2025\ninstitutional 13F filings Q3 2025")
+            delay = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.5, label="Delay between scrapes (sec)")
+            model_name = gr.Textbox(label="LLM model name (optional)", value=LLM_MODEL)
+            max_tokens = gr.Number(value=40000, label="Max tokens for LLM (may be limited by provider)")
+            run_btn = gr.Button("Run Pipeline")
+            run_note = gr.Markdown("**Note:** If OPENAI_API_KEY is not set in environment, the analysis step will fail.")
+        with gr.Column(scale=3):
+            scraped_out = gr.Textbox(lines=18, label="Scraped data (preview)", interactive=False)
+            analysis_out = gr.Textbox(lines=18, label="LLM Analysis Output", interactive=False)
+    def on_run(topics_text, delay_val, model_val, max_toks):
+        scraped_preview, analysis = run_pipeline(topics_text, delay_val, model_val, int(max_toks or 40000))
+        return scraped_preview, analysis
+    run_btn.click(on_run, inputs=[topics, delay, model_name, max_tokens], outputs=[scraped_out, analysis_out])
+    gr.Markdown(
+        """
+        ## Deployment notes
+        - Set `OPENAI_API_KEY` in your Space Secrets.
+        - If you use OpenRouter or another OpenAI-compatible host, set `OPENAI_BASE_URL` too.
+        - Set `SCRAPER_API_URL` if you have a custom scraper service.
+        """
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))