Transformers
Italian
English
semantic-search
explainable-ai
faiss
ai-ethics
responsible-ai
llm
prompt-engineering
multimodal-ai
ai-transparency
ethical-intelligence
explainable-llm
cognitive-ai
ethical-ai
scientific-retrieval
modular-ai
memory-augmented-llm
trustworthy-ai
reasoning-engine
ai-alignment
next-gen-llm
thinking-machines
open-source-ai
explainability
ai-research
semantic audit
cognitive agent
human-centered-ai
| # © 2025 Elena Marziali — Code released under Apache 2.0 license. | |
| # See LICENSE in the repository for details. | |
| # Removal of this copyright is prohibited. | |
| # Sample data for ranking | |
| data = np.array([ | |
| [120, 45, 1, 2023], # Citations, h-index, peer review, year | |
| [50, 30, 1, 2020], | |
| [10, 15, 0, 2018] | |
| ]) | |
| labels = [95, 70, 30] # Academic impact score | |
| # Model training | |
| ranking_model = RandomForestRegressor(n_estimators=100) | |
| ranking_model.fit(data, labels) | |
| # **Ranking prediction** | |
| def calculate_impact_score(citations, h_index, peer_review, publication_year): | |
| paper_data = np.array([[citations, h_index, peer_review, publication_year]]) | |
| score = ranking_model.predict(paper_data) | |
| return max(0, score[0]) # Ensure non-negative | |
| # Usage example | |
| impact_score = calculate_impact_score(80, 40, 1, 2024) | |
| print(f"Estimated score: {impact_score}") | |
| # Ranking model | |
| from sklearn.ensemble import RandomForestRegressor | |
| # Sample data for ranking | |
| data = np.array([ | |
| [120, 45, 1, 2023], # Citations, h-index, peer review, year | |
| [50, 30, 1, 2020], | |
| [10, 15, 0, 2018] | |
| ]) | |
| labels = [95, 70, 30] # Academic impact score | |
| # Model training | |
| ranking_model = RandomForestRegressor(n_estimators=100) | |
| ranking_model.fit(data, labels) | |
| # Ranking prediction | |
| new_paper = np.array([[80, 40, 1, 2024]]) | |
| score = ranking_model.predict(new_paper) | |
| print(f"Estimated score: {score[0]}") | |
| # === Scientific originality evaluation === | |
| def evaluate_hypothesis_novelty(hypothesis, existing_articles, threshold=0.7): | |
| """ | |
| Compares the hypothesis with existing articles using semantic embeddings. | |
| Returns: | |
| - average similarity score | |
| - similar articles | |
| - qualitative assessment of originality | |
| """ | |
| try: | |
| emb_hypothesis = model_embedding.encode([hypothesis]) | |
| emb_articles = model_embedding.encode([a["abstract"] for a in existing_articles if "abstract" in a]) | |
| similarity = np.dot(emb_hypothesis, emb_articles.T) / ( | |
| np.linalg.norm(emb_hypothesis) * np.linalg.norm(emb_articles, axis=1) | |
| ) | |
| average = round(float(np.mean(similarity)), 3) | |
| similar_articles = [ | |
| existing_articles[i]["title"] | |
| for i, score in enumerate(similarity[0]) if score > threshold | |
| ] | |
| if average < 0.4: | |
| assessment = "High originality: hypothesis is rarely present in the literature." | |
| elif average < 0.7: | |
| assessment = "Moderate originality: related concepts exist." | |
| else: | |
| assessment = "Low originality: hypothesis is already widely discussed." | |
| return { | |
| "novelty_score": average, | |
| "similar_articles": similar_articles, | |
| "assessment": assessment | |
| } | |
| except Exception as e: | |
| logging.error(f"[evaluate_novelty] Error during originality evaluation: {e}") | |
| return { | |
| "novelty_score": 0.0, | |
| "similar_articles": [], | |
| "assessment": "Error during originality evaluation." | |
| } | |
| # Automated paper review with AI | |
| async def review_paper(paper_text): | |
| """ Checks the methodology and citation quality of a paper. """ | |
| methodology = await verify_methodology(paper_text) | |
| citations = await verify_citations(paper_text) | |
| return {"methodology": methodology, "citations": citations} | |
| async def validate_hypothesis(hypothesis): | |
| sources = await search_multi_database(hypothesis) | |
| score = calculate_impact_score(sources) # Based on citations, year, h-index, etc. | |
| summary = summarize_evidence(sources) | |
| return score, summary | |
| def summarize_evidence(sources): | |
| return "\n".join([ | |
| f"- {a['title'][:80]}…" for a in sources if isinstance(a, dict) and 'title' in a | |
| ]) if sources else "No evidence found." | |