elly99 commited on
Commit
e8deaa1
·
verified ·
1 Parent(s): a7be358

Create scientific_verification.py

Browse files
src/science/scientific_verification.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # © 2025 Elena Marziali — Code released under Apache 2.0 license.
2
+ # See LICENSE in the repository for details.
3
+ # Removal of this copyright is prohibited.
4
+
5
+ # Verify citations and update them
6
+ def verify_citations(paper_text):
7
+ prompt = f"Analyze the citations and check whether they are relevant and up-to-date:\n{paper_text}"
8
+ return llm.invoke(prompt.strip())
9
+
10
+ # Source validation and citation quality
11
+
12
+ # Verify citations extracted from the text
13
+ async def verify_citations(paper_text):
14
+ """ Checks the quality and relevance of citations. """
15
+ citations = extract_citations(paper_text) # Function that extracts citations from the text
16
+ verified_sources = []
17
+
18
+ for citation in citations:
19
+ pubmed_res = await search_pubmed_async(citation)
20
+ arxiv_res = await search_arxiv_async(citation)
21
+ openalex_res = await search_openalex_async(citation)
22
+ zenodo_res = await search_zenodo_async(citation)
23
+
24
+ verified_sources.append({
25
+ "citation": citation,
26
+ "valid_pubmed": bool(pubmed_res),
27
+ "valid_arxiv": bool(arxiv_res),
28
+ "valid_openalex": bool(openalex_res),
29
+ "is_obsolete": check_obsolescence(citation)
30
+ })
31
+
32
+ return verified_sources
33
+
34
+ # Generate asynchronous LLM explanations
35
+ async def generate_explanation_async(problem, level, concept, topic):
36
+ """ Generates an explanation using the LLM asynchronously. """
37
+ prompt = prompt_template.format(problem=problem, concept=concept, topic=topic, level=level)
38
+ try:
39
+ return await asyncio.to_thread(llm.invoke, prompt.strip()) # Parallel LLM call
40
+ except Exception as e:
41
+ logging.error(f"LLM API error: {e}")
42
+ return "Error generating the response."
43
+
44
+ # Format retrieved articles
45
+ def format_articles(articles):
46
+ if isinstance(articles, list) and all(isinstance(a, dict) for a in articles):
47
+ return "\n\n".join([
48
+ f"**{a.get('title', 'Untitled')}**: {a.get('abstract', 'No abstract')}"
49
+ for a in articles
50
+ ]) if articles else "No articles available."
51
+ else:
52
+ logging.error(f"Error: 'articles' is not a valid list. Type received: {type(articles)} - Value: {repr(articles)}")
53
+ return "Unable to format search results: unrecognized structure."
54
+
55
+ # Generate BibTeX citations for scientific articles
56
+ def generate_bibtex_citation(title, authors, year, url):
57
+ """ Generates a BibTeX citation for a scientific article. """
58
+ return f"""
59
+ @article{{{title.lower().replace(' ', '_')}_{year},
60
+ title={{"{title}"}},
61
+ author={{"{', '.join(authors)}"}},
62
+ year={{"{year}"}},
63
+ url={{"{url}"}}
64
+ }}
65
+ """
66
+
67
+ # Validate scientific articles
68
+ def validate_articles(raw_articles, max_articles=5):
69
+ """
70
+ Validates and filters the list of articles received from an AI or API source.
71
+ Returns a clean list of dictionaries containing at least 'title', 'abstract', and 'url'.
72
+ """
73
+ if not isinstance(raw_articles, list):
74
+ logging.warning(f"[validate_articles] Invalid input: expected list, received {type(raw_articles)}")
75
+ return []
76
+
77
+ valid_articles = []
78
+ for i, art in enumerate(raw_articles):
79
+ if not isinstance(art, dict):
80
+ logging.warning(f"[validate_articles] Invalid element at position {i}: {type(art)}")
81
+ continue
82
+
83
+ title = art.get("title")
84
+ abstract = art.get("abstract")
85
+ url = art.get("url")
86
+
87
+ if all([title, abstract, url]):
88
+ valid_articles.append({
89
+ "title": str(title).strip(),
90
+ "abstract": str(abstract).strip(),
91
+ "url": str(url).strip()
92
+ })
93
+ else:
94
+ logging.info(f"[validate_articles] Article discarded due to incomplete data (i={i}).")
95
+
96
+ if not valid_articles:
97
+ logging.warning("[validate_articles] No valid articles after filtering.")
98
+
99
+ return valid_articles[:max_articles]