|
|
import pandas as pd |
|
|
from datetime import datetime |
|
|
import hashlib |
|
|
import uuid |
|
|
|
|
|
|
|
|
memory_path = "SegmentMemory.csv" |
|
|
conflict_path = "SegmentConflictMap.csv" |
|
|
|
|
|
memory_df = pd.read_csv(memory_path) |
|
|
conflict_df = pd.read_csv(conflict_path) |
|
|
|
|
|
def generate_segment_id(): |
|
|
|
|
|
existing_ids = memory_df['SegmentID'].astype(int) |
|
|
return str(existing_ids.max() + 1 if not existing_ids.empty else 1).zfill(4) |
|
|
|
|
|
def hash_segment(segment): |
|
|
return hashlib.sha256(segment.encode()).hexdigest()[:12] |
|
|
|
|
|
def insert_segment(raw_text, concepts, terms, structure, datapoints, comparisons, applications): |
|
|
new_hash = hash_segment(raw_text) |
|
|
|
|
|
|
|
|
if new_hash in memory_df['HashChecksum'].values: |
|
|
print("Duplicate segment detected. Skipping insertion.") |
|
|
return |
|
|
|
|
|
|
|
|
trust_score = 0.75 if "error" not in raw_text.lower() else -0.4 |
|
|
relevance_score = 0.6 |
|
|
entropy = 0.5 |
|
|
|
|
|
|
|
|
conflicting_rows = memory_df[memory_df['RawText'].str.contains(raw_text.split(" ")[0], case=False, na=False)] |
|
|
if not conflicting_rows.empty: |
|
|
for _, row in conflicting_rows.iterrows(): |
|
|
conflict_entry = { |
|
|
"SegmentA": row["SegmentID"], |
|
|
"SegmentB": generate_segment_id(), |
|
|
"ConflictType": "Concept Overlap", |
|
|
"ConflictScore": 0.8, |
|
|
"ResolutionStrategy": "Manual Review", |
|
|
"WinningSegment": "", |
|
|
"LastChecked": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
|
|
"ResolutionStatus": "Pending Review", |
|
|
"AgentTrigger": "System", |
|
|
"Notes": "Auto-detected via insertion engine" |
|
|
} |
|
|
conflict_df.loc[len(conflict_df)] = conflict_entry |
|
|
conflict_df.to_csv(conflict_path, index=False) |
|
|
print("Conflict detected. Segment routed to ConflictMap.") |
|
|
return |
|
|
|
|
|
|
|
|
new_row = { |
|
|
"SegmentID": generate_segment_id(), |
|
|
"RawText": raw_text, |
|
|
"Concepts": concepts, |
|
|
"Terms": terms, |
|
|
"Structure": structure, |
|
|
"DataPoints": datapoints, |
|
|
"Comparisons": comparisons, |
|
|
"Applications": applications, |
|
|
"Links": "", |
|
|
"Entropy": entropy, |
|
|
"TrustScore": trust_score, |
|
|
"RelevanceScore": relevance_score, |
|
|
"RecallCount": 0, |
|
|
"LastUsed": "", |
|
|
"ConflictsWith": "", |
|
|
"Verified": trust_score > 0.5, |
|
|
"HashChecksum": new_hash |
|
|
} |
|
|
|
|
|
memory_df.loc[len(memory_df)] = new_row |
|
|
memory_df.to_csv(memory_path, index=False) |
|
|
print(f"Segment added successfully with ID {new_row['SegmentID']}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
insert_segment( |
|
|
"Hydrogen is the lightest element in the periodic table.", |
|
|
"lightest element, hydrogen", |
|
|
"hydrogen, element, periodic", |
|
|
"Chemistry > Elements", |
|
|
"Atomic number: 1, Mass: 1.008", |
|
|
"Compared to helium and lithium", |
|
|
"Used in hydrogen fuel cells" |
|
|
) |
|
|
|