| from huggingface_hub import hf_hub_download |
| import json |
| import pandas as pd |
| from pathlib import Path |
| import gradio as gr |
|
|
| REPO_ID = "ashutoshzade/EIRKG" |
| FILENAME = "EIRKG_v1.1_full.json" |
|
|
|
|
| def load_dataset(): |
|
|
| try: |
|
|
| print("Downloading EIRKG dataset...") |
|
|
| path = hf_hub_download( |
| repo_id=REPO_ID, |
| repo_type="dataset", |
| filename=FILENAME |
| ) |
|
|
| print("Success!") |
|
|
| if FILENAME.endswith(".json"): |
| with open(path,"r",encoding="utf-8") as f: |
| return json.load(f) |
|
|
| if FILENAME.endswith(".jsonl"): |
| data=[] |
| with open(path,"r",encoding="utf-8") as f: |
| for line in f: |
| data.append(json.loads(line)) |
| return data |
|
|
| if FILENAME.endswith(".csv"): |
| return pd.read_csv(path).to_dict("records") |
|
|
| except Exception as e: |
|
|
| print(e) |
|
|
| return [] |
|
|
| DATA = load_dataset() |
|
|
| print(len(DATA)) |
|
|
| |
| |
| |
|
|
| SEARCH = [] |
|
|
| for row in DATA: |
|
|
| combined = "" |
|
|
| for value in row.values(): |
| combined += str(value).lower() + " " |
|
|
| SEARCH.append(combined) |
|
|
| |
| |
| |
|
|
| def query_eirkg(question): |
|
|
| q = question.lower() |
|
|
| scores = [] |
|
|
| words = q.split() |
|
|
| for idx, text in enumerate(SEARCH): |
|
|
| score = 0 |
|
|
| for word in words: |
| score += text.count(word) |
|
|
| scores.append((score, idx)) |
|
|
| scores.sort(reverse=True) |
|
|
| output = "" |
|
|
| count = 0 |
|
|
| for score, idx in scores: |
|
|
| if score == 0: |
| continue |
|
|
| row = DATA[idx] |
|
|
| output += f"# Match {count+1}\n\n" |
|
|
| for k, v in row.items(): |
|
|
| if str(v).strip() != "": |
| output += f"**{k}**: {v}\n\n" |
|
|
| output += "---\n\n" |
|
|
| count += 1 |
|
|
| if count >= 5: |
| break |
|
|
| if output == "": |
| output = "No matching EIRKG entries found." |
|
|
| return output |
|
|
| |
| |
| |
|
|
| DESCRIPTION = """ |
| |
| # Engineering and Innovation Reasoning Knowledge Graph (EIRKG) |
| |
| # Engineering and Innovation Corpus to demostate how cross domain solutions can be formed with innovative LLM techniques. |
| |
| Copyright (C) 2026 Ashutosh Zade |
| |
| AGPL 3.0 |
| |
| Ask engineering questions to explore the EIRKG dataset. |
| |
| Examples: |
| |
| • battery recycling |
| |
| • electric locomotive |
| |
| • autonomous mining |
| |
| • digital twin |
| |
| • biomimicry |
| |
| • carbon neutral transportation |
| |
| """ |
|
|
| demo = gr.Interface( |
|
|
| fn=query_eirkg, |
|
|
| inputs=gr.Textbox( |
| label="Engineering Question", |
| placeholder="Ask about batteries, locomotives, mining..." |
| ), |
|
|
| outputs=gr.Markdown(label="EIRKG Results"), |
|
|
| title="EIRKG Explorer", |
|
|
| description=DESCRIPTION |
|
|
| ) |
|
|
| demo.launch() |