Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import time | |
| import requests | |
| from datetime import datetime | |
| from typing import List | |
| import gradio as gr | |
| from openai import OpenAI | |
| # ----------------------- | |
| # Configuration (env) | |
| # ----------------------- | |
| # Set these in the HF Space secrets / environment (DO NOT hardcode keys) | |
| SCRAPER_API_URL = os.getenv("SCRAPER_API_URL", "https://deep-scraper-96.created.app/api/deep-scrape") | |
| SCRAPER_HEADERS = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Content-Type": "application/json", | |
| } | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # required | |
| OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1") # optional override | |
| LLM_MODEL = os.getenv("LLM_MODEL", "openai/gpt-oss-20b:free") # default from your snippet | |
| if not OPENAI_API_KEY: | |
| # Don't crash UI import β we'll show a clear message when trying to run | |
| client = None | |
| else: | |
| client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY) | |
| # PROMPT template (kept similar to your original, but avoid repeating keys inline) | |
| PROMPT_TEMPLATE = """You are AURA, an advanced hedge fund analysis engine. | |
| Analyze ALL the following data deeply and output clearly in text (no JSON). | |
| extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not | |
| For each company, include: | |
| 1. Company Name, Sector, Country | |
| 2. Hedge Fund Investors (names + amounts if found) | |
| 3. Insider Transactions (who bought/sold, when, how much) | |
| 4. Reasons Hedge Funds Invest (3β6 tangible points) | |
| 5. Risk Notes (1β3 key concerns) | |
| 6. Boom Potential: High / Medium / Low | |
| 7. Investment Strategy: | |
| - Entry timing (now, on dip, post-earnings, etc.) | |
| - Strategy type (growth, momentum, value, defensive, options) | |
| - Holding period (short/medium/long) | |
| - Exit signals (2β3 concrete ones) | |
| - for each stock provide an investment strategy and investment model how to invest and when how much to wait and approximation of what will be earned | |
| 8. Correlations (hedge fund behavior vs fundamentals) | |
| 9. Global Trend Conclusion (3β5 hedge fund behavior patterns) | |
| 10. Add a 1β2 min video narration script summarizing everything engagingly and professionally. | |
| Be detailed, analytical, and use professional formatting. | |
| extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not | |
| """ | |
| # ----------------------- | |
| # Scraping helpers | |
| # ----------------------- | |
| def deep_scrape(query: str, retries: int = 3, timeout: int = 60) -> str: | |
| """Query SCRAPER_API_URL and return aggregated readable text.""" | |
| payload = {"query": query} | |
| last_err = None | |
| for attempt in range(1, retries + 1): | |
| try: | |
| resp = requests.post(SCRAPER_API_URL, headers=SCRAPER_HEADERS, json=payload, timeout=timeout) | |
| resp.raise_for_status() | |
| result = resp.json() | |
| # Format result into text | |
| if isinstance(result, dict): | |
| parts = [] | |
| for k, v in result.items(): | |
| parts.append(f"{k.upper()}:\n{v}\n") | |
| return "\n".join(parts) | |
| else: | |
| return str(result) | |
| except Exception as e: | |
| last_err = e | |
| if attempt < retries: | |
| time.sleep(2) | |
| else: | |
| return f"ERROR: {e}" | |
| return f"ERROR: {last_err}" | |
| def multi_scrape(queries: List[str], delay: float = 1.0) -> str: | |
| """Scrape multiple queries and join results.""" | |
| results = [] | |
| for q in queries: | |
| q = q.strip() | |
| if not q: | |
| continue | |
| results.append(f"\n=== DATA FROM QUERY: {q.upper()} ===\n") | |
| data = deep_scrape(q) | |
| results.append(data) | |
| time.sleep(delay) | |
| return "\n".join(results) | |
| # ----------------------- | |
| # LLM analysis | |
| # ----------------------- | |
| def analyze_hedgefund_investments(raw_text: str, model: str = None, max_tokens: int = 8000): | |
| """Call the configured OpenAI client chat completion endpoint.""" | |
| if client is None: | |
| return "ERROR: OPENAI_API_KEY not set in environment." | |
| try: | |
| model = model or LLM_MODEL | |
| # Keep messages concise: system prompt then user content. | |
| completion = client.chat.completions.create( | |
| extra_headers={"X-Title": "MyQuantApp"}, | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": PROMPT_TEMPLATE}, | |
| {"role": "user", "content": raw_text}, | |
| ], | |
| max_tokens=max_tokens, | |
| ) | |
| # Safety: check structure | |
| if hasattr(completion, "choices") and len(completion.choices) > 0: | |
| # Newer SDK returns choices[].message.content | |
| try: | |
| return completion.choices[0].message.content | |
| except Exception: | |
| return str(completion.choices[0]) | |
| return str(completion) | |
| except Exception as e: | |
| return f"ERROR during LLM analysis: {e}" | |
| # ----------------------- | |
| # Pipeline used by Gradio | |
| # ----------------------- | |
| def run_pipeline(topics_text: str, delay: float, model_name: str, max_tokens: int): | |
| """ | |
| topics_text: newline separated list of queries | |
| delay: seconds between scrapes | |
| model_name: model to pass to LLM (optional) | |
| max_tokens: max tokens for LLM response | |
| """ | |
| if not topics_text.strip(): | |
| return "No topics provided.", "" | |
| queries = [line.strip() for line in topics_text.splitlines() if line.strip()] | |
| start_ts = datetime.utcnow().isoformat() + "Z" | |
| header = f"PIPELINE START: {start_ts}\nScraper URL: {SCRAPER_API_URL}\n\n" | |
| scraped = multi_scrape(queries, delay=delay) | |
| if scraped.startswith("ERROR"): | |
| return header + scraped, "" | |
| analysis = analyze_hedgefund_investments(scraped, model=model_name or LLM_MODEL, max_tokens=max_tokens) | |
| footer_ts = datetime.utcnow().isoformat() + "Z" | |
| header += f"\n=== SCRAPED DATA (preview) ===\n" | |
| # Keep scraped preview limited to avoid UI overload | |
| preview = scraped[:20000] + ("\n\n...[TRUNCATED]" if len(scraped) > 20000 else "") | |
| result_scraped = header + preview + f"\n\n=== END SCRAPED PREVIEW ===\nGenerated: {footer_ts}\n" | |
| return result_scraped, analysis | |
| # ----------------------- | |
| # Gradio UI | |
| # ----------------------- | |
| with gr.Blocks(title="AURA β Hedge Fund Analysis (Scraper + LLM)") as demo: | |
| gr.Markdown( | |
| """ | |
| # AURA β Hedge Fund Analysis (Gradio) | |
| Enter newline-separated queries (e.g. "SEC insider transactions october 2025", "13F filings Q3 2025") and press **Run**. | |
| **Important:** Set environment variables `OPENAI_API_KEY` (and optionally `OPENAI_BASE_URL`, `SCRAPER_API_URL`) in your Space secrets. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| topics = gr.Textbox(lines=8, label="Queries (one per line)", placeholder="e.g.\nSEC insider transactions october 2025\ninstitutional 13F filings Q3 2025") | |
| delay = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.5, label="Delay between scrapes (sec)") | |
| model_name = gr.Textbox(label="LLM model name (optional)", value=LLM_MODEL) | |
| max_tokens = gr.Number(value=40000, label="Max tokens for LLM (may be limited by provider)") | |
| run_btn = gr.Button("Run Pipeline") | |
| run_note = gr.Markdown("**Note:** If OPENAI_API_KEY is not set in environment, the analysis step will fail.") | |
| with gr.Column(scale=3): | |
| scraped_out = gr.Textbox(lines=18, label="Scraped data (preview)", interactive=False) | |
| analysis_out = gr.Textbox(lines=18, label="LLM Analysis Output", interactive=False) | |
| def on_run(topics_text, delay_val, model_val, max_toks): | |
| scraped_preview, analysis = run_pipeline(topics_text, delay_val, model_val, int(max_toks or 40000)) | |
| return scraped_preview, analysis | |
| run_btn.click(on_run, inputs=[topics, delay, model_name, max_tokens], outputs=[scraped_out, analysis_out]) | |
| gr.Markdown( | |
| """ | |
| ## Deployment notes | |
| - Set `OPENAI_API_KEY` in your Space Secrets. | |
| - If you use OpenRouter or another OpenAI-compatible host, set `OPENAI_BASE_URL` too. | |
| - Set `SCRAPER_API_URL` if you have a custom scraper service. | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) | |