Transformers
Italian
English
semantic-search
explainable-ai
faiss
ai-ethics
responsible-ai
llm
prompt-engineering
multimodal-ai
ai-transparency
ethical-intelligence
explainable-llm
cognitive-ai
ethical-ai
scientific-retrieval
modular-ai
memory-augmented-llm
trustworthy-ai
reasoning-engine
ai-alignment
next-gen-llm
thinking-machines
open-source-ai
explainability
ai-research
semantic audit
cognitive agent
human-centered-ai
File size: 4,971 Bytes
ffcb401 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# © 2025 Elena Marziali — Code released under Apache 2.0 license.
# See LICENSE in the repository for details.
# Removal of this copyright is prohibited.
# Evaluate the structure of the AI response from the LLM
def validate_ai_structure(response, expected_fields=("title", "abstract", "url")):
if not isinstance(response, list):
return []
valid_items = []
for item in response:
if isinstance(item, dict) and all(k in item for k in expected_fields):
valid_items.append(item)
return valid_items
import math
# Compute semantic score of the response
def sigmoid(x):
return 1 / (1 + math.exp(-x))
def evaluate_score(model_output):
try:
score = float(model_output[0])
return round(sigmoid(score), 3)
except:
return 0.0
# Extract text from selected file
def extract_text(file_name, max_chars=5000):
"""
Extracts text from supported formats (.pdf, .docx, .tsv, .csv).
Returns only the first max_chars characters.
"""
extension = file_name.lower().split(".")[-1]
try:
if extension == "pdf":
with pdfplumber.open(file_name) as pdf:
text = "\n".join([p.extract_text() or "" for p in pdf.pages]).strip()
elif extension == "docx":
doc = Document(file_name)
text = "\n".join([p.text for p in doc.paragraphs]).strip()
elif extension in ["csv", "tsv"]:
sep = "," if extension == "csv" else "\t"
df = pd.read_csv(file_name, sep=sep)
text = df.to_string(index=False)
else:
raise ValueError(f"Unsupported format: .{extension}")
return text[:max_chars] if text else "No text extracted."
except Exception as e:
return f"Error during text extraction: {e}"
# Safely extract textual content from an AIMessage
def extract_text_from_ai(obj):
""" Safely extracts textual content from an AIMessage object. """
return getattr(obj, "content", str(obj)).strip()
# Extract figure captions from text
def extract_captions_from_text(text):
pattern = r"(Figure|Fig\.?)\s*\d+[:\.\-–]?\s*[^\n]+"
return re.findall(pattern, text, re.IGNORECASE)
# Extract images and captions from a file
def extract_images_with_captions(file_path, output_folder="extracted_figures"):
os.makedirs(output_folder, exist_ok=True)
extension = file_path.lower().split(".")[-1]
images = []
captions = []
try:
if extension == "pdf":
doc = fitz.open(file_path)
full_text = "\n".join([p.get_text("text") for p in doc])
extracted_captions = extract_captions_from_text(full_text)
count = 0
for i, page in enumerate(doc):
for j, img in enumerate(page.get_images(full=True)):
base = doc.extract_image(img[0])
ext = base["ext"]
path = f"{output_folder}/page{i+1}_img{j+1}.{ext}"
with open(path, "wb") as f:
f.write(base["image"])
images.append(path)
captions.append(extracted_captions[count] if count < len(extracted_captions) else f"Figure {i+1}.{j+1}")
count += 1
elif extension == "docx":
doc = Document(file_path)
text = "\n".join([p.text for p in doc.paragraphs])
extracted_captions = extract_captions_from_text(text)
count = 0
for i, rel in enumerate(doc.part._rels):
relation = doc.part._rels[rel]
if "image" in relation.target_ref:
img_data = relation.target_part.blob
name = f"{output_folder}/docx_image_{i+1}.png"
with open(name, "wb") as f:
f.write(img_data)
images.append(name)
captions.append(extracted_captions[count] if count < len(extracted_captions) else f"Figure {i+1}")
count += 1
else:
print(f"Unsupported extension: .{extension}")
print(f"{len(images)} image(s) extracted.")
return images, captions
except Exception as e:
print(f"Error extracting images: {e}")
return [], []
# Generate semantic coherence note based on score
def generate_note(score):
if score > 0.85:
return "High semantic coherence. The response is likely solid and relevant."
elif score > 0.6:
return "Moderate coherence. The response is understandable but may contain approximations."
else:
return "Low coherence. It may be helpful to rephrase the question or provide more context."
# Simulate LLM response generation
def generate_response(question, temperature=0.7):
if "Rephrase" in question:
return "How does enthalpy change during a phase transition?"
return f"[Simulated response at temperature {temperature} for: {question}]" |