|
|
|
|
|
import pandas as pd |
|
|
from fuzzywuzzy import process |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
url = "https://raw.githubusercontent.com/devbernie/Medimatch/refs/heads/main/dataset.csv" |
|
|
data = pd.read_csv(url) |
|
|
|
|
|
|
|
|
columns_to_process = ['Composition', 'Uses', 'Medicine Name'] |
|
|
for column in columns_to_process: |
|
|
data[column] = data[column].fillna('').str.lower() |
|
|
|
|
|
|
|
|
def fuzzy_search(query, data_column, threshold=80): |
|
|
matches = process.extract(query.lower(), data_column, limit=5) |
|
|
results = [match[0] for match in matches if match[1] >= threshold] |
|
|
return results |
|
|
|
|
|
|
|
|
vectorizer_composition = TfidfVectorizer() |
|
|
composition_matrix = vectorizer_composition.fit_transform(data['Composition']) |
|
|
|
|
|
vectorizer_uses = TfidfVectorizer() |
|
|
uses_matrix = vectorizer_uses.fit_transform(data['Uses']) |
|
|
|
|
|
|
|
|
composition_similarity = cosine_similarity(composition_matrix) |
|
|
uses_similarity = cosine_similarity(uses_matrix) |
|
|
overall_similarity = (composition_similarity + uses_similarity) / 2 |
|
|
|
|
|
|
|
|
def recommend_medicine(input_name, top_n=5): |
|
|
matches = fuzzy_search(input_name, data['Medicine Name']) |
|
|
if not matches: |
|
|
return f"Không tìm thấy thuốc gần đúng với '{input_name}'. Vui lòng thử lại." |
|
|
|
|
|
selected_medicine = matches[0] |
|
|
idx = data[data['Medicine Name'] == selected_medicine].index[0] |
|
|
sim_scores = list(enumerate(overall_similarity[idx])) |
|
|
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
recommendations = [] |
|
|
for i in sim_scores[1:top_n+1]: |
|
|
med_info = data.iloc[i[0]] |
|
|
recommendations.append({ |
|
|
"Medicine Name": med_info['Medicine Name'], |
|
|
"Composition": med_info['Composition'], |
|
|
"Uses": med_info['Uses'], |
|
|
"Side Effects": med_info['Side_effects'], |
|
|
"Manufacturer": med_info['Manufacturer'], |
|
|
"Image URL": med_info['Image URL'] |
|
|
}) |
|
|
return recommendations |
|
|
|
|
|
|
|
|
def gradio_recommend(input_name): |
|
|
results = recommend_medicine(input_name) |
|
|
if isinstance(results, str): |
|
|
return results |
|
|
|
|
|
output = "" |
|
|
for med in results: |
|
|
output += f"### Tên thuốc: {med['Medicine Name'].capitalize()}\n" |
|
|
output += f"**Thành phần:** {med['Composition']}\n" |
|
|
output += f"**Công dụng:** {med['Uses']}\n" |
|
|
output += f"**Tác dụng phụ:** {med['Side Effects']}\n" |
|
|
output += f"**Nhà sản xuất:** {med['Manufacturer']}\n" |
|
|
if med['Image URL']: |
|
|
output += f"\n" |
|
|
output += "\n" |
|
|
return output |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=gradio_recommend, |
|
|
inputs="text", |
|
|
outputs="markdown", |
|
|
title="Gợi ý Thuốc Tương Tự", |
|
|
description="Nhập tên thuốc để nhận danh sách các thuốc tương tự dựa trên thành phần và công dụng." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |