import pandas as pd from fuzzywuzzy import process from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr # Load dataset from GitHub url = "https://raw.githubusercontent.com/devbernie/Medimatch/refs/heads/main/dataset.csv" data = pd.read_csv(url) # Preprocess data columns_to_process = ['Composition', 'Uses', 'Medicine Name'] for column in columns_to_process: data[column] = data[column].fillna('').str.lower() # Fuzzy search function def fuzzy_search(query, data_column, threshold=80): matches = process.extract(query.lower(), data_column, limit=5) results = [match[0] for match in matches if match[1] >= threshold] return results # Vectorize data vectorizer_composition = TfidfVectorizer() composition_matrix = vectorizer_composition.fit_transform(data['Composition']) vectorizer_uses = TfidfVectorizer() uses_matrix = vectorizer_uses.fit_transform(data['Uses']) # Calculate similarities composition_similarity = cosine_similarity(composition_matrix) uses_similarity = cosine_similarity(uses_matrix) overall_similarity = (composition_similarity + uses_similarity) / 2 # Recommendation function def recommend_medicine(input_name, top_n=5): matches = fuzzy_search(input_name, data['Medicine Name']) if not matches: return f"Không tìm thấy thuốc gần đúng với '{input_name}'. Vui lòng thử lại." selected_medicine = matches[0] idx = data[data['Medicine Name'] == selected_medicine].index[0] sim_scores = list(enumerate(overall_similarity[idx])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) recommendations = [] for i in sim_scores[1:top_n+1]: med_info = data.iloc[i[0]] recommendations.append({ "Medicine Name": med_info['Medicine Name'], "Composition": med_info['Composition'], "Uses": med_info['Uses'], "Side Effects": med_info['Side_effects'], "Manufacturer": med_info['Manufacturer'], "Image URL": med_info['Image URL'] }) return recommendations # Gradio interface function def gradio_recommend(input_name): results = recommend_medicine(input_name) if isinstance(results, str): # Error message return results output = "" for med in results: output += f"### Tên thuốc: {med['Medicine Name'].capitalize()}\n" output += f"**Thành phần:** {med['Composition']}\n" output += f"**Công dụng:** {med['Uses']}\n" output += f"**Tác dụng phụ:** {med['Side Effects']}\n" output += f"**Nhà sản xuất:** {med['Manufacturer']}\n" if med['Image URL']: output += f"![Hình ảnh thuốc]({med['Image URL']})\n" output += "\n" return output # Gradio app interface = gr.Interface( fn=gradio_recommend, inputs="text", outputs="markdown", title="Gợi ý Thuốc Tương Tự", description="Nhập tên thuốc để nhận danh sách các thuốc tương tự dựa trên thành phần và công dụng." ) if __name__ == "__main__": interface.launch()