File size: 14,991 Bytes
a7be358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
# © 2025 Elena Marziali — Code released under Apache 2.0 license.
# See LICENSE in the repository for details.
# Removal of this copyright is prohibited.

# === Asynchronous Functions ===
MAX_REQUESTS = 5
API_SEMAPHORE = asyncio.Semaphore(MAX_REQUESTS)

async def safe_api_request(url):
    async with API_SEMAPHORE:
        async with aiohttp.ClientSession() as session:
            try:
                async with session.get(url, timeout=10) as response:
                    response.raise_for_status()
                    return await response.json()
            except Exception as e:
                logging.error(f"API request error: {e}")
                return None

# Connection pooling
async def safe_api_request(url):
    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(url, timeout=10) as response:
                response.raise_for_status()
                return await response.json()
        except Exception as e:
            logging.error(f"API request error: {e}")
            return None

# Smart timeout
import asyncio

async def timeout_handler(task, timeout=20):
    try:
        return await asyncio.wait_for(task, timeout)
    except asyncio.TimeoutError:
        logging.error("API request timed out")
        return None

import requests

url = "http://export.arxiv.org/api/query?search_query=all:physics&start=0&max_results=1"
response = requests.get(url, timeout=50)

if response.status_code == 200:
    print("Connection to arXiv OK")
else:
    print(f"Connection error: {response.status_code}")

# Advanced parallelization
async def fetch_multiple_data(urls):
    tasks = [safe_api_request(url) for url in urls]
    results = await asyncio.gather(*tasks, return_exceptions=True)
    return results

# Retrieve scientific sources from Zenodo
async def search_zenodo_async(query, max_results=5):
    """
    Searches for open access articles and resources from Zenodo using their public API.
    """
    url = f"https://zenodo.org/api/records/?q={query}&size={max_results}"

    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(url, timeout=10) as response:
                response.raise_for_status()
                data = await response.json()

                articles = []
                for hit in data.get("hits", {}).get("hits", []):
                    title = hit.get("metadata", {}).get("title", "Title not available")
                    authors = ", ".join([c.get("name", "") for c in hit.get("metadata", {}).get("creators", [])])
                    abstract = hit.get("metadata", {}).get("description", "Abstract not available")
                    link = hit.get("links", {}).get("html", "No link")

                    articles.append({
                        "title": title,
                        "authors": authors,
                        "abstract": abstract,
                        "url": link
                    })

                return articles if articles else [{"error": "No results found on Zenodo."}]

        except Exception as e:
            return []

# Retrieve scientific sources from PubMed
async def search_pubmed_async(query, max_results=5):
    """ Asynchronously retrieves scientific articles from PubMed. """
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={query}&retmax={max_results}&retmode=xml"

    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(url, timeout=10) as response:
                response.raise_for_status()
                content = await response.text()
                root = ET.fromstring(content)

                articles = []
                for id_element in root.findall(".//Id"):
                    pubmed_id = id_element.text
                    articles.append(f"https://pubmed.ncbi.nlm.nih.gov/{pubmed_id}/")  # Article links
                return articles
        except Exception as e:
            return f"PubMed error: {e}"


# Function to handle asynchronous responses from arXiv
def parse_arxiv_response(content):
    """ Extracts titles and abstracts from arXiv articles. """
    try:
        root = ET.fromstring(content)
    except ET.ParseError:
        logging.error("Error parsing arXiv XML.")
        return []

    articles = []
    for entry in root.findall(".//entry"):
        title = entry.find("title").text if entry.find("title") is not None else "Title not available"
        abstract = entry.find("summary").text if entry.find("summary") is not None else "Abstract not available"
        articles.append({"title": title, "abstract": abstract})

    return articles

# === Asynchronous search on arXiv ===
# Queries the arXiv API to retrieve scientific articles.
async def search_arxiv_async(query, max_results=3, retry_attempts=3, timeout=20):
    """ Retrieves scientific articles from arXiv with advanced error handling. """
    url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results={max_results}"

    async with aiohttp.ClientSession() as session:
        for attempt in range(retry_attempts):
            try:
                async with session.get(url, timeout=timeout) as response:
                    response.raise_for_status()
                    content = await response.text()

                    if not content.strip():
                        raise ValueError("Error: Empty response from arXiv.")

                    return parse_arxiv_response(content)

            except (aiohttp.ClientError, asyncio.TimeoutError, ValueError) as e:
                wait_time = min(2 ** attempt + np.random.uniform(0, 1), 10)  # Max wait time: 10 seconds
                logging.error(f"Attempt {attempt+1}: Error - {e}. Retrying in {wait_time:.1f} seconds...")
                await asyncio.sleep(wait_time)

    logging.error("Error: Unable to retrieve data from arXiv after multiple attempts.")
    return []

# === Asynchronous search on OpenAlex ===
# Retrieves scientific articles with complete metadata (title, authors, abstract, DOI)
async def search_openalex_async(query, max_results=5):
    """ Safely retrieves scientific articles from OpenAlex. """
    url = f"https://api.openalex.org/works?filter=title.search:{query}&per-page={max_results}"

    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(url, timeout=10) as response:
                response.raise_for_status()
                data = await response.json()

                articles = []
                for record in data.get("results", []):
                    title = record.get("title", "Title not available")

                    authors = ", ".join([
                        aut.get("display_name", "Unknown author")
                        for aut in record.get("authorships", [])
                    ])

                    abstract = record.get("abstract", "Abstract not available")
                    article_url = record.get("doi") or record.get("id", "No link")

                    articles.append({
                        "title": title,
                        "authors": authors,
                        "abstract": abstract,
                        "url": article_url
                    })

                return articles

        except Exception as e:
            return f"OpenAlex error: {e}"


# === Synchronous search on BASE ===
# Queries the BASE engine for open-access articles.
def search_base(query, max_results=5):
    url = f"https://api.base-search.net/cgi-bin/BaseHttpSearchInterface?q={query}&num={max_results}&format=json"

    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        results = []
        for record in data.get("docs", []):
            title = record.get("dcTitle", ["Title not available"])[0]
            link = record.get("link", ["No link available"])[0]
            results.append(f"**{title}**\n[Link to article]({link})\n")

        return "\n\n".join(results) if results else "No results found."

    except Exception as e:
        return f"Error during BASE search: {e}"

# === Distributed search across multiple databases ===
# Executes parallel queries on arXiv, OpenAlex, PubMed, Zenodo.
async def search_multi_database(query):
    try:
        tasks = [
            search_arxiv_async(query),
            search_openalex_async(query),
            search_pubmed_async(query),
            search_zenodo_async(query)
        ]
        results = await asyncio.gather(*tasks, return_exceptions=True)

        articles = []
        for source in results:
            if isinstance(source, list):
                articles += source
            else:
                logging.warning(f"Invalid source: {type(source)}{source}")

        # Normalize immediately after
        articles = normalize_articles(articles)

        if isinstance(articles, list) and all(isinstance(a, dict) for a in articles):
            formatted_search = format_articles(articles)
        else:
            logging.error(f"Error: 'articles' is not a valid list. Type received: {type(articles)} - Value: {repr(articles)}")
            formatted_search = "Unable to format search: response not properly structured."

        return articles, formatted_search

    except Exception as e:
        logging.error(f"Error during multi-database search: {e}")
        return [], "Internal error"


# === Scientific Source Integration ===
# Selects the first N valid articles and formats them as Markdown references.
async def integrate_sources_from_database(concept, max_sources=5):
    articles, formatted_search = await search_multi_database(concept)

    if not isinstance(articles, list) or not all(isinstance(a, dict) for a in articles):
        logging.warning("Invalid 'articles' structure. No sources will be displayed.")
        return "No valid sources available."

    references = []
    for a in articles[:max_sources]:
        title = a.get("title", "Title not available")
        url = a.get("url", "#")
        if url and isinstance(url, str):
            references.append(f"- [{title}]({url})")

    return "\n".join(references) if references else "No relevant sources found."


# === Data Normalization ===
# Converts heterogeneous input (dicts, strings, links) into a consistent list of articles.
def normalize_source(source):
    if isinstance(source, list) and all(isinstance(x, dict) for x in source):
        return source
    elif isinstance(source, dict):  # Single article as dictionary
        return [source]
    elif isinstance(source, str):  # Unstructured string
        logging.warning(f"Ignored textual source: {source[:50]}...")
        return []
    else:
        logging.warning(f"Invalid source type: {type(source)}")
        return []

def normalize_articles(article_list):
    valid_articles = []
    for a in article_list:
        if isinstance(a, dict):
            valid_articles.append(a)
        elif isinstance(a, str) and "pubmed.ncbi.nlm.nih.gov" in a:
            valid_articles.append({
                "title": "PubMed Link",
                "abstract": "Not available",
                "url": a,
                "authors": "Unknown"
            })
        else:
            logging.warning(f"Ignored: {repr(a)}")
    return valid_articles

articles, formatted_search = await search_multi_database("quantum physics")
print(formatted_search)


# === Async Task Protection Wrapper ===
# Handles timeouts and errors during asynchronous function execution.
def protect_async_task(func):
    async def wrapper(*args, **kwargs):
        try:
            return await asyncio.wait_for(func(*args, **kwargs), timeout=20)
        except asyncio.CancelledError:
            logging.warning("Task cancelled.")
            return None
        except Exception as e:
            logging.error(f"Error during execution of {func.__name__}: {e}")
            return None
    return wrapper

# === Asynchronous Scientific Explanation Generation ===
# Builds the prompt and invokes the LLM model.
async def generate_explanation_async(problem, level, concept, topic):
    """Generates the explanation using the LLM asynchronously."""
    prompt = prompt_template.format(
        problem=problem,
        concept=concept,
        topic=topic,
        level=level
    )
    try:
        response = await asyncio.to_thread(llm.invoke, prompt.strip())
        return response
    except Exception as e:
        logging.error(f"LLM API error: {e}")
        return "Error generating the response."

# === Conditional Interactive Chart Generation ===
# Generates a chart based on the analyzed problem if requested.
def generate_conditional_chart(problem, chart_choice):
    """Generates an interactive chart if requested."""
    fig = None
    if chart_choice.lower() in ["yes", "y"]:
        try:
            fig = generate_interactive_chart(problem)
            if fig is None:
                raise ValueError("Chart not generated correctly.")
            print("Chart generated successfully!")
        except Exception as e:
            logging.error(f"Chart error: {e}")
    return fig

# === Structured Output: Text + Chart ===
# Combines the generated explanation with the graphical visualization.
async def generate_complete_result(problem, level, concept, topic, chart_choice):
    """Combines explanation and chart to generate a structured output."""
    response = await generate_explanation_async(problem, level, concept, topic)
    chart = generate_conditional_chart(problem, chart_choice)
    return {
        "response": response,
        "chart": chart
    }


# === Scientific Article Validation ===
# Checks that each article has a title, abstract, and URL.
def validate_articles(raw_articles, max_articles=5):
    """
    Validates and filters the list of articles received from an AI or API source.
    Returns a clean list of dictionaries containing at least 'title', 'abstract', and 'url'.
    """
    if not isinstance(raw_articles, list):
        logging.warning(f"[validate_articles] Invalid input: expected list, received {type(raw_articles)}")
        return []

    valid_articles = []
    for i, art in enumerate(raw_articles):
        if not isinstance(art, dict):
            logging.warning(f"[validate_articles] Invalid element at position {i}: {type(art)}")
            continue

        title = art.get("title")
        abstract = art.get("abstract")
        url = art.get("url")

        if all([title, abstract, url]):
            valid_articles.append({
                "title": str(title).strip(),
                "abstract": str(abstract).strip(),
                "url": str(url).strip()
            })
        else:
            logging.info(f"[validate_articles] Article discarded due to incomplete data (i={i}).")

    if not valid_articles:
        logging.warning("[validate_articles] No valid articles after filtering.")

    return valid_articles[:max_articles]