Guiyom commited on
Commit
10fd028
·
verified ·
1 Parent(s): 93ae956

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -41
app.py CHANGED
@@ -37,25 +37,11 @@ def call_visual_llm(prompt: str) -> str:
37
  return response
38
 
39
  def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
40
- """
41
- Given the placeholder instructions, create a prompt and call the LLM to generate
42
- a complete HTML/CSS/JS snippet for the visualization.
43
- """
44
  prompt = (f"""
45
- Generate a complete, self-contained HTML code snippet that includes inline CSS and JavaScript(Flexbox/Grid, animations, transitions).
46
- The code should display an interactive, appealing visualization based on the following requirements:
47
  {placeholder_text}
48
 
49
- // Requirements
50
- - the dimensions should be less than 400 high and 400 width
51
- - use a font no larger than 10, with bold and italic if needed
52
- - if for a specific shape the background is dark, the text should be white (and vice versa if the background is clear)
53
- - Use semantic HTML5 elements
54
- - Add subtle animations and transitions
55
- - Display either:
56
- o chart (histogram, curve) with the proper call to a js library
57
- o a diagram (in the style of a mindmap, or five forces, or a flow chart)
58
-
59
  // Reference
60
  The visual is expected to be integrated within a report generated for the user, it should make use of any relevant information from:
61
  - the initial user query:
@@ -65,12 +51,21 @@ The visual is expected to be integrated within a report generated for the user,
65
  - some knowledge material gathered from search engines
66
  {crumbs}
67
 
 
 
 
 
 
 
 
 
 
 
68
  // IMPORTTANT
69
- - The visualization should be responsive, include any necessary interactive features (such as tooltips, clickable items, animations),
70
- and output only the code
71
  - no extra explanation
72
- - no code fences.
73
-
74
  """
75
  )
76
  result = call_visual_llm(prompt)
@@ -171,10 +166,6 @@ def openai_call(prompt: str, messages: list = None, model: str = "o3-mini",
171
  return err_msg
172
 
173
  def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 4000) -> dict:
174
- """
175
- Use gpt-4o-mini to process a snippet from a query result.
176
- Returns a dictionary with keys: 'relevant', 'summary', and 'followups'.
177
- """
178
  client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
179
  prompt = (f"""Analyze the following content from a query result:
180
 
@@ -184,24 +175,26 @@ Research topic:
184
  {query}
185
 
186
  Instructions:
187
- 1. **Relevance:** Determine if the content is relevant to the research topic. Answer with a single word: 'yes' or 'no'.
188
-
189
- 2. **Structured Summary (if relevant):** If the content is relevant, provide a comprehensive summary structured into the following sections. **Prioritize extreme conciseness and token efficiency while preserving all key information.** Aim for the shortest possible summary that retains all essential facts, figures, arguments, and quotes. The total summary should not exceed 1000 words, but shorter is strongly preferred.
190
- * **Key Facts:** List the core factual claims. Use short, declarative sentences or bullet points. **Apply lemmatization, common abbreviations (e.g., vs., e.g., i.e., AI, LLM), and remove unnecessary words.**
191
- * **Key Figures:** Extract numerical data, statistics, dates, percentages. Use numerical representation. **Present concisely (list or table format).**
192
- * **Key Arguments:** Identify main arguments/claims. Summarize supporting evidence and counter-arguments. **Use lemmatization, abbreviations, and concise phrasing. Remove redundant phrases.**
193
- * **Key Quotes:** Include significant quotes. Attribute quotes correctly. **Choose quotes that are concise and impactful. If a quote can be paraphrased concisely without losing essential meaning, paraphrase it and note that it's a paraphrase.** Use symbols instead of words (&, +, ->, =, ...).
194
- **General Optimization Guidelines:**
195
- * **Lemmatize:** Use the root form of words (e.g., "running" -> "run").
196
- * **Abbreviate:** Use common abbreviations (see list above).
197
- * **Remove Redundancy:** Eliminate unnecessary words and phrases. Be concise.
198
- * **Shorten Words (Carefully):** If a shorter word conveys the same meaning (e.g., "information" -> "info"), use it, but avoid ambiguity.
199
- * **Implicit Representation:** Remove redundant terms.
200
- * **Use Symbols:** Use symbols instead of words (&, +, ->, =, ...).
201
-
202
- 3. **Follow-up Search Queries:** Generate at least {breadth} follow-up search queries. These should be relevant to the research topic but also developments from the content summarized, aim for deeper understanding, use search operators (AND, OR, quotation marks), and be represented as a Python list of strings.
 
 
203
  For example: "Artificial intelligence" AND (mathematics OR geometry) -algebra,science AND history AND mathematics,...
204
- Return the result as a JSON object with the keys 'relevant', 'summary', and 'followups'. The 'summary' value should itself be a JSON object with keys 'Key Facts', 'Key Figures', 'Key Arguments', and 'Key Quotes'.
205
 
206
  Proceed."""
207
  )
 
37
  return response
38
 
39
  def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
 
 
 
 
40
  prompt = (f"""
41
+ Generate a complete, self-contained HTML code snippet that includes inline CSS and JavaScript (only to call relevant libraries).
42
+ The code should display a simple but effective and elegant visualization based on the following requirements:
43
  {placeholder_text}
44
 
 
 
 
 
 
 
 
 
 
 
45
  // Reference
46
  The visual is expected to be integrated within a report generated for the user, it should make use of any relevant information from:
47
  - the initial user query:
 
51
  - some knowledge material gathered from search engines
52
  {crumbs}
53
 
54
+ // Requirements
55
+ - the dimensions should be less than 500px height and 500px width (it should be printable once the report is converted to pdf)
56
+ - use a font no larger than 10, with bold and italic if needed
57
+ - if for a specific shape the background is dark, the text should be white (and vice versa if the background is clear)
58
+ - Use HTML5 elements if necessary
59
+ - Display either:
60
+ o chart (histogram, curve) with the proper call to a js library (ex: d3.js or plotly)
61
+ o a diagram (in the style of a mindmap, or five forces, or a flow chart)
62
+ - keep it simple but effective to convey the message
63
+
64
  // IMPORTTANT
65
+ - output only the code
 
66
  - no extra explanation
67
+ - no code fences
68
+ - do not add <html> </html> or <!DOCTYPE html>, the snippet will be integrated in a html code body part at a pre-defined location
69
  """
70
  )
71
  result = call_visual_llm(prompt)
 
166
  return err_msg
167
 
168
  def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 4000) -> dict:
 
 
 
 
169
  client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
170
  prompt = (f"""Analyze the following content from a query result:
171
 
 
175
  {query}
176
 
177
  Instructions:
178
+ 1. Relevance: Determine if the content is relevant to the research topic. Answer with a single word: 'yes' or 'no'.
179
+
180
+ 2. Structure: If the content is relevant, provide a comprehensive summary structured into the following sections. Prioritize extreme conciseness and token efficiency while preserving all key information. Aim for the shortest possible summary that retains all essential facts, figures, arguments, and quotes. The total summary should not exceed 1000 words, but shorter is strongly preferred.
181
+ - Key Facts (at least 10): List the core factual claims. Use short, declarative sentences or bullet points. Apply lemmatization, common abbreviations (e.g., vs., e.g., i.e., AI, LLM), and remove unnecessary words.
182
+ - Key Figures (at least 5): Extract numerical data, statistics, dates, percentages. Use numerical representation. Present concisely (list or table format).
183
+ - Key Arguments (at least 10): Identify main arguments/claims. Summarize supporting evidence and counter-arguments. Use lemmatization, abbreviations, and concise phrasing. Remove redundant phrases.
184
+ - Key Quotes (at least 1 f any): Include significant quotes (with the name of the author between parenthesis). Attribute quotes correctly. Choose quotes that are concise and impactful. If a quote can be paraphrased concisely without losing essential meaning, paraphrase it and note that it's a paraphrase. Use symbols instead of words (&, +, ->, =, ...).
185
+ - Structured summary (10 to 50 sentences depending on the length): mention anecdotes, people, locations, anything that make will make the end report relatable and grounded
186
+
187
+ Note: General Optimization Guidelines:
188
+ - Lemmatize: Use the root form of words (e.g., "running" -> "run").
189
+ - Abbreviate: Use common abbreviations
190
+ - Remove Redundancy: Eliminate unnecessary words and phrases. Be concise.
191
+ - Shorten Words (Carefully): If a shorter word conveys the same meaning (e.g., "information" -> "info"), use it, but avoid ambiguity.
192
+ - Implicit Representation: Remove redundant terms.
193
+ - Use Symbols: Use symbols instead of words (&, +, ->, =, ...).
194
+
195
+ 3. Follow-up Search Queries: Generate at least {breadth} follow-up search queries. These should be relevant to the research topic but also developments from the content summarized, aim for deeper understanding, use search operators (AND, OR, quotation marks), and be represented as a Python list of strings.
196
  For example: "Artificial intelligence" AND (mathematics OR geometry) -algebra,science AND history AND mathematics,...
197
+ Return the result as a JSON object with the keys 'relevant', 'structure', and 'followups'. The 'structure' value should itself be a JSON object with keys 'Key Facts', 'Key Figures', 'Key Arguments', 'Key Quotes' and 'Summary'.
198
 
199
  Proceed."""
200
  )