GabrielSalem commited on
Commit
32ea916
Β·
verified Β·
1 Parent(s): 9da0170

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import time
4
+ import requests
5
+ from datetime import datetime
6
+ from typing import List
7
+
8
+ import gradio as gr
9
+ from openai import OpenAI
10
+
11
+ # -----------------------
12
+ # Configuration (env)
13
+ # -----------------------
14
+ # Set these in the HF Space secrets / environment (DO NOT hardcode keys)
15
+ SCRAPER_API_URL = os.getenv("SCRAPER_API_URL", "https://deep-scraper-96.created.app/api/deep-scrape")
16
+ SCRAPER_HEADERS = {
17
+ "User-Agent": "Mozilla/5.0",
18
+ "Content-Type": "application/json",
19
+ }
20
+
21
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # required
22
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1") # optional override
23
+ LLM_MODEL = os.getenv("LLM_MODEL", "openai/gpt-oss-20b:free") # default from your snippet
24
+
25
+ if not OPENAI_API_KEY:
26
+ # Don't crash UI import β€” we'll show a clear message when trying to run
27
+ client = None
28
+ else:
29
+ client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY)
30
+
31
+ # PROMPT template (kept similar to your original, but avoid repeating keys inline)
32
+ PROMPT_TEMPLATE = """You are AURA, an advanced hedge fund analysis engine.
33
+ Analyze ALL the following data deeply and output clearly in text (no JSON).
34
+ extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not
35
+
36
+ For each company, include:
37
+ 1. Company Name, Sector, Country
38
+ 2. Hedge Fund Investors (names + amounts if found)
39
+ 3. Insider Transactions (who bought/sold, when, how much)
40
+ 4. Reasons Hedge Funds Invest (3–6 tangible points)
41
+ 5. Risk Notes (1–3 key concerns)
42
+ 6. Boom Potential: High / Medium / Low
43
+ 7. Investment Strategy:
44
+ - Entry timing (now, on dip, post-earnings, etc.)
45
+ - Strategy type (growth, momentum, value, defensive, options)
46
+ - Holding period (short/medium/long)
47
+ - Exit signals (2–3 concrete ones)
48
+ - for each stock provide an investment strategy and investment model how to invest and when how much to wait and approximation of what will be earned
49
+ 8. Correlations (hedge fund behavior vs fundamentals)
50
+ 9. Global Trend Conclusion (3–5 hedge fund behavior patterns)
51
+ 10. Add a 1–2 min video narration script summarizing everything engagingly and professionally.
52
+
53
+ Be detailed, analytical, and use professional formatting.
54
+ extract the historical stock price data of each company your analysing to heighten the investment and to augment the arguments to see if its good to invest or not
55
+ """
56
+
57
+ # -----------------------
58
+ # Scraping helpers
59
+ # -----------------------
60
+ def deep_scrape(query: str, retries: int = 3, timeout: int = 60) -> str:
61
+ """Query SCRAPER_API_URL and return aggregated readable text."""
62
+ payload = {"query": query}
63
+ last_err = None
64
+ for attempt in range(1, retries + 1):
65
+ try:
66
+ resp = requests.post(SCRAPER_API_URL, headers=SCRAPER_HEADERS, json=payload, timeout=timeout)
67
+ resp.raise_for_status()
68
+ result = resp.json()
69
+ # Format result into text
70
+ if isinstance(result, dict):
71
+ parts = []
72
+ for k, v in result.items():
73
+ parts.append(f"{k.upper()}:\n{v}\n")
74
+ return "\n".join(parts)
75
+ else:
76
+ return str(result)
77
+ except Exception as e:
78
+ last_err = e
79
+ if attempt < retries:
80
+ time.sleep(2)
81
+ else:
82
+ return f"ERROR: {e}"
83
+ return f"ERROR: {last_err}"
84
+
85
+ def multi_scrape(queries: List[str], delay: float = 1.0) -> str:
86
+ """Scrape multiple queries and join results."""
87
+ results = []
88
+ for q in queries:
89
+ q = q.strip()
90
+ if not q:
91
+ continue
92
+ results.append(f"\n=== DATA FROM QUERY: {q.upper()} ===\n")
93
+ data = deep_scrape(q)
94
+ results.append(data)
95
+ time.sleep(delay)
96
+ return "\n".join(results)
97
+
98
+ # -----------------------
99
+ # LLM analysis
100
+ # -----------------------
101
+ def analyze_hedgefund_investments(raw_text: str, model: str = None, max_tokens: int = 8000):
102
+ """Call the configured OpenAI client chat completion endpoint."""
103
+ if client is None:
104
+ return "ERROR: OPENAI_API_KEY not set in environment."
105
+ try:
106
+ model = model or LLM_MODEL
107
+ # Keep messages concise: system prompt then user content.
108
+ completion = client.chat.completions.create(
109
+ extra_headers={"X-Title": "MyQuantApp"},
110
+ model=model,
111
+ messages=[
112
+ {"role": "system", "content": PROMPT_TEMPLATE},
113
+ {"role": "user", "content": raw_text},
114
+ ],
115
+ max_tokens=max_tokens,
116
+ )
117
+ # Safety: check structure
118
+ if hasattr(completion, "choices") and len(completion.choices) > 0:
119
+ # Newer SDK returns choices[].message.content
120
+ try:
121
+ return completion.choices[0].message.content
122
+ except Exception:
123
+ return str(completion.choices[0])
124
+ return str(completion)
125
+ except Exception as e:
126
+ return f"ERROR during LLM analysis: {e}"
127
+
128
+ # -----------------------
129
+ # Pipeline used by Gradio
130
+ # -----------------------
131
+ def run_pipeline(topics_text: str, delay: float, model_name: str, max_tokens: int):
132
+ """
133
+ topics_text: newline separated list of queries
134
+ delay: seconds between scrapes
135
+ model_name: model to pass to LLM (optional)
136
+ max_tokens: max tokens for LLM response
137
+ """
138
+ if not topics_text.strip():
139
+ return "No topics provided.", ""
140
+ queries = [line.strip() for line in topics_text.splitlines() if line.strip()]
141
+ start_ts = datetime.utcnow().isoformat() + "Z"
142
+ header = f"PIPELINE START: {start_ts}\nScraper URL: {SCRAPER_API_URL}\n\n"
143
+ scraped = multi_scrape(queries, delay=delay)
144
+ if scraped.startswith("ERROR"):
145
+ return header + scraped, ""
146
+ analysis = analyze_hedgefund_investments(scraped, model=model_name or LLM_MODEL, max_tokens=max_tokens)
147
+ footer_ts = datetime.utcnow().isoformat() + "Z"
148
+ header += f"\n=== SCRAPED DATA (preview) ===\n"
149
+ # Keep scraped preview limited to avoid UI overload
150
+ preview = scraped[:20000] + ("\n\n...[TRUNCATED]" if len(scraped) > 20000 else "")
151
+ result_scraped = header + preview + f"\n\n=== END SCRAPED PREVIEW ===\nGenerated: {footer_ts}\n"
152
+ return result_scraped, analysis
153
+
154
+ # -----------------------
155
+ # Gradio UI
156
+ # -----------------------
157
+ with gr.Blocks(title="AURA β€” Hedge Fund Analysis (Scraper + LLM)") as demo:
158
+ gr.Markdown(
159
+ """
160
+ # AURA β€” Hedge Fund Analysis (Gradio)
161
+ Enter newline-separated queries (e.g. "SEC insider transactions october 2025", "13F filings Q3 2025") and press **Run**.
162
+ **Important:** Set environment variables `OPENAI_API_KEY` (and optionally `OPENAI_BASE_URL`, `SCRAPER_API_URL`) in your Space secrets.
163
+ """
164
+ )
165
+
166
+ with gr.Row():
167
+ with gr.Column(scale=2):
168
+ topics = gr.Textbox(lines=8, label="Queries (one per line)", placeholder="e.g.\nSEC insider transactions october 2025\ninstitutional 13F filings Q3 2025")
169
+ delay = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.5, label="Delay between scrapes (sec)")
170
+ model_name = gr.Textbox(label="LLM model name (optional)", value=LLM_MODEL)
171
+ max_tokens = gr.Number(value=40000, label="Max tokens for LLM (may be limited by provider)")
172
+ run_btn = gr.Button("Run Pipeline")
173
+ run_note = gr.Markdown("**Note:** If OPENAI_API_KEY is not set in environment, the analysis step will fail.")
174
+ with gr.Column(scale=3):
175
+ scraped_out = gr.Textbox(lines=18, label="Scraped data (preview)", interactive=False)
176
+ analysis_out = gr.Textbox(lines=18, label="LLM Analysis Output", interactive=False)
177
+
178
+ def on_run(topics_text, delay_val, model_val, max_toks):
179
+ scraped_preview, analysis = run_pipeline(topics_text, delay_val, model_val, int(max_toks or 40000))
180
+ return scraped_preview, analysis
181
+
182
+ run_btn.click(on_run, inputs=[topics, delay, model_name, max_tokens], outputs=[scraped_out, analysis_out])
183
+
184
+ gr.Markdown(
185
+ """
186
+ ## Deployment notes
187
+ - Set `OPENAI_API_KEY` in your Space Secrets.
188
+ - If you use OpenRouter or another OpenAI-compatible host, set `OPENAI_BASE_URL` too.
189
+ - Set `SCRAPER_API_URL` if you have a custom scraper service.
190
+ """
191
+ )
192
+
193
+ if __name__ == "__main__":
194
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))