Medimatch / app.py
devbernie's picture
Create app.py
0f95a2e verified
import pandas as pd
from fuzzywuzzy import process
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
# Load dataset from GitHub
url = "https://raw.githubusercontent.com/devbernie/Medimatch/refs/heads/main/dataset.csv"
data = pd.read_csv(url)
# Preprocess data
columns_to_process = ['Composition', 'Uses', 'Medicine Name']
for column in columns_to_process:
data[column] = data[column].fillna('').str.lower()
# Fuzzy search function
def fuzzy_search(query, data_column, threshold=80):
matches = process.extract(query.lower(), data_column, limit=5)
results = [match[0] for match in matches if match[1] >= threshold]
return results
# Vectorize data
vectorizer_composition = TfidfVectorizer()
composition_matrix = vectorizer_composition.fit_transform(data['Composition'])
vectorizer_uses = TfidfVectorizer()
uses_matrix = vectorizer_uses.fit_transform(data['Uses'])
# Calculate similarities
composition_similarity = cosine_similarity(composition_matrix)
uses_similarity = cosine_similarity(uses_matrix)
overall_similarity = (composition_similarity + uses_similarity) / 2
# Recommendation function
def recommend_medicine(input_name, top_n=5):
matches = fuzzy_search(input_name, data['Medicine Name'])
if not matches:
return f"Không tìm thấy thuốc gần đúng với '{input_name}'. Vui lòng thử lại."
selected_medicine = matches[0]
idx = data[data['Medicine Name'] == selected_medicine].index[0]
sim_scores = list(enumerate(overall_similarity[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
recommendations = []
for i in sim_scores[1:top_n+1]:
med_info = data.iloc[i[0]]
recommendations.append({
"Medicine Name": med_info['Medicine Name'],
"Composition": med_info['Composition'],
"Uses": med_info['Uses'],
"Side Effects": med_info['Side_effects'],
"Manufacturer": med_info['Manufacturer'],
"Image URL": med_info['Image URL']
})
return recommendations
# Gradio interface function
def gradio_recommend(input_name):
results = recommend_medicine(input_name)
if isinstance(results, str): # Error message
return results
output = ""
for med in results:
output += f"### Tên thuốc: {med['Medicine Name'].capitalize()}\n"
output += f"**Thành phần:** {med['Composition']}\n"
output += f"**Công dụng:** {med['Uses']}\n"
output += f"**Tác dụng phụ:** {med['Side Effects']}\n"
output += f"**Nhà sản xuất:** {med['Manufacturer']}\n"
if med['Image URL']:
output += f"![Hình ảnh thuốc]({med['Image URL']})\n"
output += "\n"
return output
# Gradio app
interface = gr.Interface(
fn=gradio_recommend,
inputs="text",
outputs="markdown",
title="Gợi ý Thuốc Tương Tự",
description="Nhập tên thuốc để nhận danh sách các thuốc tương tự dựa trên thành phần và công dụng."
)
if __name__ == "__main__":
interface.launch()