from huggingface_hub import hf_hub_download import json import pandas as pd from pathlib import Path import gradio as gr REPO_ID = "ashutoshzade/EIRKG" FILENAME = "EIRKG_v1.1_full.json" def load_dataset(): try: print("Downloading EIRKG dataset...") path = hf_hub_download( repo_id=REPO_ID, repo_type="dataset", filename=FILENAME ) print("Success!") if FILENAME.endswith(".json"): with open(path,"r",encoding="utf-8") as f: return json.load(f) if FILENAME.endswith(".jsonl"): data=[] with open(path,"r",encoding="utf-8") as f: for line in f: data.append(json.loads(line)) return data if FILENAME.endswith(".csv"): return pd.read_csv(path).to_dict("records") except Exception as e: print(e) return [] DATA = load_dataset() print(len(DATA)) # --------------------- # Build searchable text # --------------------- SEARCH = [] for row in DATA: combined = "" for value in row.values(): combined += str(value).lower() + " " SEARCH.append(combined) # --------------------- # Search function # --------------------- def query_eirkg(question): q = question.lower() scores = [] words = q.split() for idx, text in enumerate(SEARCH): score = 0 for word in words: score += text.count(word) scores.append((score, idx)) scores.sort(reverse=True) output = "" count = 0 for score, idx in scores: if score == 0: continue row = DATA[idx] output += f"# Match {count+1}\n\n" for k, v in row.items(): if str(v).strip() != "": output += f"**{k}**: {v}\n\n" output += "---\n\n" count += 1 if count >= 5: break if output == "": output = "No matching EIRKG entries found." return output # --------------------- # Gradio UI # --------------------- DESCRIPTION = """ # Engineering and Innovation Reasoning Knowledge Graph (EIRKG) # Engineering and Innovation Corpus to demostate how cross domain solutions can be formed with innovative LLM techniques. Copyright (C) 2026 Ashutosh Zade AGPL 3.0 Ask engineering questions to explore the EIRKG dataset. Examples: • battery recycling • electric locomotive • autonomous mining • digital twin • biomimicry • carbon neutral transportation """ demo = gr.Interface( fn=query_eirkg, inputs=gr.Textbox( label="Engineering Question", placeholder="Ask about batteries, locomotives, mining..." ), outputs=gr.Markdown(label="EIRKG Results"), title="EIRKG Explorer", description=DESCRIPTION ) demo.launch()