File size: 3,004 Bytes
e351e96
 
5607d95
 
 
 
e351e96
 
 
 
 
 
5607d95
e351e96
5607d95
 
 
 
e351e96
 
5607d95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e351e96
5607d95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e351e96
 
 
 
 
5607d95
 
 
 
 
e351e96
 
 
5607d95
e351e96
 
 
 
 
 
 
5607d95
 
 
 
 
 
 
 
e351e96
5607d95
e351e96
 
5607d95
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
from agent import ResearchAgent
import pandas as pd
import matplotlib.pyplot as plt
import json
import tempfile

agent = ResearchAgent()

def run_pipeline(file):
    try:
        if file is None:
            return "Upload CSV", None, None, None, None, None, None, None, None

        # Save temp file
        path = file.name

        result = agent.execute_pipeline(path)

        if "error" in result:
            return result["error"], None, None, None, None, None, None, None, None

        # Load outputs
        comp = pd.read_csv("comparison.csv")
        topic = pd.read_csv("topic_review_table.csv")
        keywords = pd.read_csv("keywords.csv")

        with open("taxonomy_map.json") as f:
            taxonomy = json.load(f)

        import plotly.express as px

        # -------- Graph 1: similarity distribution --------
        fig1 = px.histogram(
            comp,
            x="similarity_score",
            nbins=30,
            title="Title vs Abstract Similarity Distribution",
        )
        fig1.update_layout(xaxis_title="Similarity Score", yaxis_title="Frequency")

        # -------- Graph 2: topic importance --------
        top_topics = topic.sort_values("document_count", ascending=False).head(15)

        fig2 = px.bar(
            top_topics,
            x="topic_id",
            y="document_count",
            title="Top 15 Topics by Document Coverage",
        )

        # -------- Graph 3: keyword relevance --------
        top_keywords = keywords.sort_values("relevance", ascending=False).head(15)

        fig3 = px.bar(
            top_keywords,
            x="ID",
            y="relevance",
            title="Top Keyword Clusters by Relevance",
        )

        # -------- Graph 4: mapping insight --------
        mapped = len(taxonomy["mapped"])
        novel = len(taxonomy["novel"])

        fig4 = px.pie(
            names=["Mapped", "Novel"],
            values=[mapped, novel],
            title="Knowledge Mapping: Known vs Novel Themes",
        )
        return (
            "✅ Pipeline completed",
            "comparison.csv",
            "taxonomy_map.json",
            "topic_review_table.csv",
            "keywords.csv",
            "comp_plot.png",
            "topic_plot.png",
            "keywords_plot.png",
            "taxonomy_plot.png"
        )

    except Exception as e:
        return str(e), None, None, None, None, None, None, None, None


demo = gr.Interface(
    fn=run_pipeline,
    inputs=gr.File(label="Upload CSV"),
    outputs=[
        gr.Textbox(label="Status"),
        gr.File(label="comparison.csv"),
        gr.File(label="taxonomy_map.json"),
        gr.File(label="topic_review_table.csv"),
        gr.File(label="keywords.csv"),
        gr.Image(label="Similarity Graph"),
        gr.Image(label="Topic Distribution"),
        gr.Image(label="Keyword Relevance"),
        gr.Image(label="Mapping Graph"),
    ],
    title="Topic Modeling Dashboard"
)

demo.launch(share=True)