diff --git "a/CODE_DOCUMENTATION.md" "b/CODE_DOCUMENTATION.md" new file mode 100644--- /dev/null +++ "b/CODE_DOCUMENTATION.md" @@ -0,0 +1,6457 @@ +# derm-ai + +Generated on: C:\Work\derm-ai + +## Project Structure + +``` +derm-ai/ +├── app +│ ├── config +│ │ ├── temp +│ │ ├── __init__.py +│ │ └── config.py +│ ├── database +│ │ ├── __init__.py +│ │ ├── database_query.py +│ │ └── db.py +│ ├── middleware +│ │ └── auth.py +│ ├── routers +│ │ ├── temp +│ │ ├── admin.py +│ │ ├── agent_chat.py +│ │ ├── auth.py +│ │ ├── chat.py +│ │ ├── chat_session.py +│ │ ├── language.py +│ │ ├── location.py +│ │ ├── preferences.py +│ │ ├── profile.py +│ │ └── questionnaire.py +│ ├── services +│ │ ├── MagicConvert.py +│ │ ├── RAG_evaluation.py +│ │ ├── __init__.py +│ │ ├── agent_service.py +│ │ ├── agentic_prompt.py +│ │ ├── chat_processor.py +│ │ ├── chathistory.py +│ │ ├── environmental_condition.py +│ │ ├── image_classification_vit.py +│ │ ├── image_processor.py +│ │ ├── llm_model.py +│ │ ├── prompts.py +│ │ ├── report_process.py +│ │ ├── skincare_scheduler.py +│ │ ├── tools.py +│ │ ├── vector_database_search.py +│ │ ├── websearch.py +│ │ └── wheel.py +│ ├── __init__.py +│ └── main.py +├── temp +├── uploads +├── Dockerfile +├── LICENSE +├── Makefile +├── README.md +├── app.py +├── docker-compose.yml +├── document_code.py +└── pyproject.toml +``` + +## Source Code + +### app\__init__.py + +```python +# app/__init__.py +from app.main import app + +__all__ = [ + "app", +] + + + +``` + +### app\config\__init__.py + +```python +from app.config.config import Config + +config = Config() +``` + +### app\config\config.py + +```python +import os +from dotenv import load_dotenv + + +load_dotenv() + +class Config: + JWT_SECRET_KEY = os.getenv('JWT_SECRET_KEY') + JWT_ACCESS_TOKEN_EXPIRES = int(os.getenv('JWT_ACCESS_TOKEN_EXPIRES')) + CORS_ORIGINS = ["http://localhost:3000"] + UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'temp') +``` + +### app\database\__init__.py + +```python +from app.database.db import get_db, db +from app.database.database_query import DatabaseQuery + +__all__ = ["get_db", "db", "DatabaseQuery"] +``` + +### app\database\database_query.py + +```python +from app.database.db import db +import re +from bson import ObjectId +from datetime import datetime, timezone, timedelta +from pymongo import DESCENDING +from typing import Optional + + +class DatabaseQuery: + def __init__(self): + pass + + def create_chat_session(self, chat_session): + try: + db.chat_sessions.insert_one(chat_session) + except Exception as e: + raise Exception(f"Error creating chat session: {str(e)}") + + def get_user_chat_sessions(self, user_id): + try: + sessions = list(db.chat_sessions.find( + {"user_id": user_id}, + {"_id": 0} + ).sort("last_accessed", -1)) + return sessions + except Exception as e: + raise Exception(f"Error retrieving user chat sessions: {str(e)}") + + def create_chat(self, chat_data): + try: + db.chats.insert_one(chat_data) + return True + except Exception as e: + raise Exception(f"Error creating chat: {str(e)}") + + def update_last_accessed_time(self, session_id): + try: + db.chat_sessions.update_one( + {"session_id": session_id}, + {"$set": {"last_accessed": datetime.now(timezone.utc)}} + ) + except Exception as e: + raise Exception(f"Error updating last accessed time: {str(e)}") + + def get_session_chats(self, session_id, user_id): + try: + chats = list(db.chats.find( + {"session_id": session_id, "user_id": user_id}, + {"_id": 0} + ).sort("timestamp", 1)) + return chats + except Exception as e: + raise Exception(f"Error retrieving session chats: {str(e)}") + + def get_user_by_identifier(self, identifier): + try: + user = db.users.find_one({'$or': [{'username': identifier}, {'email': identifier}]}) + return user + except Exception as e: + raise Exception(f"Error retrieving user by identifier: {str(e)}") + + def add_token_to_blacklist(self, jti): + try: + db.blacklist.insert_one({'jti': jti}) + except Exception as e: + raise Exception(f"Error adding token to blacklist: {str(e)}") + + + def create_indexes(self): + try: + db.chat_sessions.create_index([("user_id", 1), ("last_accessed", -1)]) + db.chat_sessions.create_index([("session_id", 1)]) + db.chats.create_index([("session_id", 1), ("timestamp", 1)]) + db.chats.create_index([("user_id", 1)]) + except Exception as e: + raise Exception(f"Error creating indexes: {str(e)}") + + def check_chat_session(self, session_id): + try: + chat_session = db.chat_sessions.find_one({'session_id': session_id}) + return chat_session is not None + except Exception as e: + raise Exception(f"Error checking chat session: {str(e)}") + + def get_user_profile(self, username): + try: + user = db.users.find_one({'username': username}, {'password': 0}) + return user + except Exception as e: + raise Exception(f"Error getting user profile: {str(e)}") + + def update_user_profile(self, username, update_fields): + try: + result = db.users.update_one( + {'username': username}, + {'$set': update_fields} + ) + return result.modified_count > 0 + except Exception as e: + raise Exception(f"Error updating user profile: {str(e)}") + + def delete_user_account(self, username): + try: + result = db.users.delete_one({'username': username}) + return result.deleted_count > 0 + except Exception as e: + raise Exception(f"Error deleting user account: {str(e)}") + + def is_username_or_email_exists(self, username, email): + try: + user = db.users.find_one({'$or': [{'username': username}, {'email': email}]}) + return user is not None + except Exception as e: + raise Exception(f"Error checking if username or email exists: {str(e)}") + + def create_or_update_temp_user(self, username, email, temp_user_data): + try: + db.temp_users.update_one( + {'$or': [{'username': username}, {'email': email}]}, + {'$set': temp_user_data}, + upsert=True + ) + except Exception as e: + raise Exception(f"Error creating/updating temp user: {str(e)}") + + def get_temp_user_by_username(self, username): + try: + temp_user = db.temp_users.find_one({'username': username}) + return temp_user + except Exception as e: + raise Exception(f"Error retrieving temp user by username: {str(e)}") + + def delete_temp_user(self, username): + try: + db.temp_users.delete_one({'username': username}) + except Exception as e: + raise Exception(f"Error deleting temp user: {str(e)}") + + def create_user_from_data(self, user_data): + try: + db.users.insert_one(user_data) + return user_data + except Exception as e: + raise Exception(f"Error creating user from data: {str(e)}") + + def create_user(self, username, email, hashed_password, name, age, created_at, + is_verified=False, verification_code=None, code_expiration=None): + try: + new_user = { + 'username': username, + 'email': email, + 'password': hashed_password, + 'name': name, + 'age': age, + 'created_at': created_at, + 'is_verified': is_verified + } + if verification_code and code_expiration: + new_user['verification_code'] = verification_code + new_user['code_expiration'] = code_expiration + + db.users.insert_one(new_user) + return new_user + except Exception as e: + raise Exception(f"Error creating user: {str(e)}") + + def get_user_by_username(self, username): + try: + user = db.users.find_one({'username': username}) + return user + except Exception as e: + raise Exception(f"Error retrieving user by username: {str(e)}") + + def verify_user_email(self, username): + try: + result = db.users.update_one( + {'username': username}, + {'$set': {'is_verified': True}, '$unset': {'verification_code': '', 'code_expiration': ''}} + ) + return result.modified_count > 0 + except Exception as e: + raise Exception(f"Error verifying user email: {str(e)}") + + def update_verification_code(self, username, verification_code, code_expiration): + try: + result = db.users.update_one( + {'username': username}, + {'$set': {'verification_code': verification_code, 'code_expiration': code_expiration}} + ) + return result.modified_count > 0 + except Exception as e: + raise Exception(f"Error updating verification code: {str(e)}") + + def is_valid_email(self, email): + try: + email_regex = r'^\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b' + return re.match(email_regex, email) is not None + except Exception as e: + raise Exception(f"Error validating email: {str(e)}") + + def add_or_update_location(self, username, location): + try: + db.locations.update_one( + {'username': username}, + {'$set': {'location': location, 'updated_at': datetime.now(timezone.utc)}}, + upsert=True + ) + except Exception as e: + raise Exception(f"Error adding/updating location: {str(e)}") + + def get_location(self, username): + try: + location = db.locations.find_one({'username': username}) + return location + except Exception as e: + raise Exception(f"Error retrieving location: {str(e)}") + + def submit_questionnaire(self, user_id, answers): + try: + questionnaire_data = { + 'user_id': user_id, + 'answers': answers, + 'created_at': datetime.now(timezone.utc), + 'updated_at': datetime.now(timezone.utc) + } + result = db.questionnaires.insert_one(questionnaire_data) + return str(result.inserted_id) + except Exception as e: + raise Exception(f"Error submitting questionnaire: {str(e)}") + + def get_latest_questionnaire(self, user_id): + try: + questionnaire = db.questionnaires.find_one( + {'user_id': user_id}, + sort=[('created_at', -1)] + ) + if questionnaire: + questionnaire['_id'] = str(questionnaire['_id']) + return questionnaire + except Exception as e: + raise Exception(f"Error getting latest questionnaire: {str(e)}") + + def update_questionnaire(self, questionnaire_id, user_id, answers): + try: + result = db.questionnaires.update_one( + {'_id': ObjectId(questionnaire_id), 'user_id': user_id}, + { + '$set': { + 'answers': answers, + 'updated_at': datetime.now(timezone.utc) + } + } + ) + return result.modified_count > 0 + except Exception as e: + raise Exception(f"Error updating questionnaire: {str(e)}") + + def delete_questionnaire(self, questionnaire_id, user_id): + try: + result = db.questionnaires.delete_one( + {'_id': ObjectId(questionnaire_id), 'user_id': user_id} + ) + return result.deleted_count > 0 + except Exception as e: + raise Exception(f"Error deleting questionnaire: {str(e)}") + + + def count_answered_questions(self, username): + try: + answered_count = db.questions.count_documents({ + 'username': username, + 'answer': {'$ne': None} + }) + return answered_count + except Exception as e: + raise Exception(f"Error counting answered questions: {str(e)}") + + def get_user_preferences(self, username): + try: + user_preferences = db.preferences.find_one({'username': username}) + if not user_preferences: + return { + 'keywords': False, + 'references': False, + 'websearch': False, + 'personalized_recommendations': False, + 'environmental_recommendations': False + } + return { + 'keywords': user_preferences.get('keywords', False), + 'references': user_preferences.get('references', False), + 'websearch': user_preferences.get('websearch', False), + 'personalized_recommendations': user_preferences.get('personalized_recommendations', False), + 'environmental_recommendations': user_preferences.get('environmental_recommendations', False) + } + except Exception as e: + raise Exception(f"Error getting user preferences: {str(e)}") + + def set_user_preferences(self, username, preferences): + try: + preferences_data = { + 'username': username, + 'keywords': bool(preferences.get('keywords', False)), + 'references': bool(preferences.get('references', False)), + 'websearch': bool(preferences.get('websearch', False)), + 'personalized_recommendations': bool(preferences.get('personalized_recommendations', False)), + 'environmental_recommendations': bool(preferences.get('environmental_recommendations', False)), + 'updated_at': datetime.now(timezone.utc) + } + result = db.preferences.update_one( + {'username': username}, + {'$set': preferences_data}, + upsert=True + ) + return preferences_data + except Exception as e: + raise Exception(f"Error setting user preferences: {str(e)}") + + def get_user_theme(self, username): + try: + user_theme = db.user_themes.find_one({'username': username}) + if not user_theme: + return 'light' + return user_theme.get('theme', 'light') + except Exception as e: + raise Exception(f"Error getting user theme: {str(e)}") + + def set_user_theme(self, username, theme): + try: + theme_data = { + 'username': username, + 'theme': "dark" if theme else "light", + 'updated_at': datetime.now(timezone.utc) + } + db.user_themes.update_one( + {'username': username}, + {'$set': theme_data}, + upsert=True + ) + return theme_data + except Exception as e: + raise Exception(f"Error setting user theme: {str(e)}") + + + def verify_session(self, session_id, user_id): + try: + session = db.chat_sessions.find_one({ + "session_id": session_id, + "user_id": user_id + }) + return session is not None + except Exception as e: + raise Exception(f"Error verifying session: {str(e)}") + + def update_chat_session_title(self, session_id, new_title): + try: + result = db.chat_sessions.update_one( + {"session_id": session_id}, + {"$set": {"title": new_title}} + ) + if result.matched_count == 0: + raise Exception("Chat session not found") + return result.modified_count > 0 + except Exception as e: + raise Exception(f"Error updating chat session title: {str(e)}") + + def delete_chat_session(self, session_id, user_id): + try: + session_result = db.chat_sessions.delete_one({ + "session_id": session_id, + "user_id": user_id + }) + chats_result = db.chats.delete_many({ + "session_id": session_id, + "user_id": user_id + }) + + return { + "session_deleted": session_result.deleted_count > 0, + "chats_deleted": chats_result.deleted_count + } + except Exception as e: + raise Exception(f"Error deleting chat session and chats: {str(e)}") + + def delete_all_user_sessions_and_chats(self, user_id): + try: + chats_result = db.chats.delete_many({"user_id": user_id}) + sessions_result = db.chat_sessions.delete_many({"user_id": user_id}) + return { + "deleted_chats": chats_result.deleted_count, + "deleted_sessions": sessions_result.deleted_count + } + except Exception as e: + raise Exception(f"Error deleting user sessions and chats: {str(e)}") + + def get_all_user_chats(self, user_id): + try: + sessions = list(db.chat_sessions.find( + {"user_id": user_id}, + {"_id": 0} + ).sort("last_accessed", -1)) + all_chats = [] + for session in sessions: + session_chats = list(db.chats.find( + {"session_id": session["session_id"], "user_id": user_id}, + {"_id": 0} + ).sort("timestamp", 1)) + + all_chats.append({ + "session_id": session["session_id"], + "title": session.get("title", "New Chat"), + "created_at": session.get("created_at"), + "last_accessed": session.get("last_accessed"), + "chats": session_chats + }) + + return all_chats + except Exception as e: + raise Exception(f"Error retrieving all user chats: {str(e)}") + + def store_reset_token(self, email, token, expiration): + try: + db.password_resets.update_one( + {'email': email}, + { + '$set': { + 'token': token, + 'expiration': expiration + } + }, + upsert=True + ) + except Exception as e: + raise Exception(f"Error storing reset token: {str(e)}") + + def verify_reset_token(self, token): + try: + reset_info = db.password_resets.find_one({ + 'token': token, + 'expiration': {'$gt': datetime.now(timezone.utc)} + }) + return reset_info + except Exception as e: + raise Exception(f"Error verifying reset token: {str(e)}") + + def update_password(self, email, hashed_password): + try: + db.users.update_one( + {'email': email}, + {'$set': {'password': hashed_password}} + ) + except Exception as e: + raise Exception(f"Error updating password: {str(e)}") + + def delete_reset_token(self, token): + try: + db.password_resets.delete_one({'token': token}) + except Exception as e: + raise Exception(f"Error deleting reset token: {str(e)}") + + def delete_account_permanently(self, username): + try: + chat_deletion_result = self.delete_all_user_sessions_and_chats(username) + preferences_result = db.preferences.delete_one({'username': username}) + theme_result = db.user_themes.delete_one({'username': username}) + location_result = db.locations.delete_one({'username': username}) + questionnaire_result = db.questionnaires.delete_many({'user_id': username}) + user_result = db.users.delete_one({'username': username}) + + return { + 'success': True, + 'deleted_data': { + 'chats': chat_deletion_result['deleted_chats'], + 'chat_sessions': chat_deletion_result['deleted_sessions'], + 'preferences': preferences_result.deleted_count, + 'theme': theme_result.deleted_count, + 'location': location_result.deleted_count, + 'questionnaires': questionnaire_result.deleted_count, + 'user_account': user_result.deleted_count + } + } + except Exception as e: + raise Exception(f"Error deleting account permanently: {str(e)}") + + def store_reset_token(self, email, token, expiration): + try: + db.password_resets.update_one( + {'email': email}, + { + '$set': { + 'token': token, + 'expiration': expiration + } + }, + upsert=True + ) + except Exception as e: + raise Exception(f"Error storing reset token: {str(e)}") + + def verify_reset_token(self, token): + try: + reset_info = db.password_resets.find_one({ + 'token': token, + 'expiration': {'$gt': datetime.now(timezone.utc)} + }) + return reset_info + except Exception as e: + raise Exception(f"Error verifying reset token: {str(e)}") + + def update_password(self, email, new_password): + try: + db.users.update_one( + {'email': email}, + {'$set': {'password': new_password}} + ) + except Exception as e: + raise Exception(f"Error updating password: {str(e)}") + + def get_user_language(self, user_id): + try: + language = db.languages.find_one({'user_id': user_id}) + return language.get('language') if language else None + except Exception as e: + raise Exception(f"Error retrieving user language: {str(e)}") + + def set_user_language(self, user_id, language): + try: + language_data = { + 'user_id': user_id, + 'language': language, + 'updated_at': datetime.now(timezone.utc) + } + result = db.languages.update_one( + {'user_id': user_id}, + {'$set': language_data}, + upsert=True + ) + return language_data + except Exception as e: + raise Exception(f"Error setting user language: {str(e)}") + + def delete_user_language(self, user_id): + try: + result = db.languages.delete_one({'user_id': user_id}) + return result.deleted_count > 0 + except Exception as e: + raise Exception(f"Error deleting user language: {str(e)}") + + def get_today_schedule(self, user_id): + try: + # Get today's date at midnight UTC + today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) + tomorrow = today.replace(hour=23, minute=59, second=59) + + schedule = db.skin_schedules.find_one({ + "user_id": user_id, + "created_at": { + "$gte": today, + "$lte": tomorrow + } + }) + return schedule + except Exception as e: + raise Exception(f"Error retrieving today's schedule: {str(e)}") + + def save_schedule(self, user_id, schedule_data): + try: + existing_schedule = self.get_today_schedule(user_id) + if existing_schedule: + return str(existing_schedule["_id"]) + + schedule = { + "user_id": user_id, + "schedule_data": schedule_data, + "created_at": datetime.now(timezone.utc) + } + result = db.skin_schedules.insert_one(schedule) + return str(result.inserted_id) + except Exception as e: + raise Exception(f"Error saving schedule: {str(e)}") + + def get_last_seven_days_schedules(self, user_id): + try: + seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7) + schedules = db.skin_schedules.find({ + "user_id": user_id, + "created_at": {"$gte": seven_days_ago} + }).sort("created_at", -1) + return list(schedules) + except Exception as e: + raise Exception(f"Error fetching last 7 days schedules: {str(e)}") + + def save_rag_interaction(self, user_id: str, session_id: str, context: str, query: str, + response: str, rag_start_time: datetime, rag_end_time: datetime): + try: + interaction = { + "interaction_id": str(ObjectId()), + "user_id": user_id, + "session_id": session_id, + "context": context, + "query": query, + "response": response, + "rag_start_time": rag_start_time.astimezone(timezone.utc), + "rag_end_time": rag_end_time.astimezone(timezone.utc), + "created_at": datetime.now(timezone.utc) + } + + result = db.rag_interactions.insert_one(interaction) + return interaction["interaction_id"] + + except Exception as e: + raise Exception(f"Error saving RAG interaction: {str(e)}") + + def get_rag_interactions( + self, + user_id: Optional[str] = None, + page: int = 1, + page_size: int = 5 + ) -> dict: + try: + query_filter = {} + if user_id: + query_filter["user_id"] = user_id + + skip = (page - 1) * page_size + total = db.rag_interactions.count_documents(query_filter) + interactions = db.rag_interactions.find( + query_filter, + {"_id": 0} + ).sort("created_at", DESCENDING).skip(skip).limit(page_size) + + result_list = [] + for interaction in interactions: + interaction["rag_start_time"] = interaction["rag_start_time"].isoformat() + interaction["rag_end_time"] = interaction["rag_end_time"].isoformat() + interaction["created_at"] = interaction["created_at"].isoformat() + result_list.append(interaction) + + return { + "total_interactions": total, + "page": page, + "page_size": page_size, + "total_pages": (total + page_size - 1) // page_size, + "results": result_list + } + except Exception as e: + raise Exception(f"Error retrieving RAG interactions: {str(e)}") + + def log_image_upload(self, user_id): + """Log an image upload for a user""" + try: + timestamp = datetime.now(timezone.utc) # This is timezone-aware + db.image_uploads.insert_one({ + "user_id": user_id, + "timestamp": timestamp + }) + return True + except Exception as e: + raise Exception(f"Error logging image upload: {str(e)}") + + def get_user_daily_uploads(self, user_id): + """Get number of images uploaded by user in the last 24 hours""" + try: + now = datetime.now(timezone.utc) + yesterday = now - timedelta(days=1) + + count = db.image_uploads.count_documents({ + "user_id": user_id, + "timestamp": {"$gte": yesterday} + }) + return count + except Exception as e: + raise Exception(f"Error retrieving user daily uploads: {str(e)}") + + def get_user_last_upload_time(self, user_id): + """Get the timestamp of user's most recent image upload""" + try: + last_upload = db.image_uploads.find_one( + {"user_id": user_id}, + sort=[("timestamp", DESCENDING)] + ) + return last_upload["timestamp"] if last_upload else None + except Exception as e: + raise Exception(f"Error retrieving last upload time: {str(e)}") +``` + +### app\database\db.py + +```python +import os +from pymongo.mongo_client import MongoClient +from pymongo.server_api import ServerApi + + +uri = os.getenv('MONGO_URI') + +mongo_uri = os.getenv('MONGO_URI') +if not mongo_uri: + raise ValueError("MONGO_URI environment variable is not set") + + +def get_db(): + client = MongoClient(uri, server_api=ServerApi('1')) + try: + client.admin.command('ping') + except Exception as e: + print(e) + return client.get_database("dermai") + +db = get_db() +``` + +### app\main.py + +```python +# app/main.py +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +import os +from dotenv import load_dotenv +from app.config.config import Config +from app.routers import admin, auth, chat, location, preferences, profile, questionnaire, language, chat_session +from app.routers import agent_chat +load_dotenv() + +app = FastAPI(title="Skin AI API") + +# Configure CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Mount static files for uploads +os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True) +app.mount("/uploads", StaticFiles(directory=Config.UPLOAD_FOLDER), name="uploads") + +# Register routers +app.include_router(admin.router, prefix="/api", tags=["admin"]) +app.include_router(auth.router, prefix="/api", tags=["auth"]) +app.include_router(chat.router, prefix="/api", tags=["chat"]) +app.include_router(location.router, prefix="/api", tags=["location"]) +app.include_router(preferences.router, prefix="/api", tags=["preferences"]) +app.include_router(profile.router, prefix="/api", tags=["profile"]) +app.include_router(questionnaire.router, prefix="/api", tags=["questionnaire"]) +app.include_router(language.router, prefix="/api", tags=["language"]) +app.include_router(chat_session.router, prefix="/api", tags=["chat_session"]) +app.include_router(agent_chat.router, prefix="/api", tags=["agent_chat"]) + + +@app.get("/") +async def root(): + return {"message": "API is running", "status": "healthy"} +``` + +### app\middleware\auth.py + +```python +# app/middleware/auth.py +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +import jwt +from datetime import datetime, timedelta +import os + +security = HTTPBearer() +JWT_SECRET_KEY = os.getenv('JWT_SECRET_KEY') +JWT_ACCESS_TOKEN_EXPIRES = int(os.getenv('JWT_ACCESS_TOKEN_EXPIRES')) + +def create_access_token(data: dict): + to_encode = data.copy() + expire = datetime.utcnow() + timedelta(seconds=JWT_ACCESS_TOKEN_EXPIRES) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm="HS256") + return encoded_jwt + +def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): + try: + payload = jwt.decode(credentials.credentials, JWT_SECRET_KEY, algorithms=["HS256"]) + username: str = payload.get("sub") + if username is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + return username + except jwt.PyJWTError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + +def get_current_user(username: str = Depends(verify_token)): + return username + +# For optional JWT authentication (some endpoints allow unauthenticated access) +def get_optional_user(authorization: HTTPAuthorizationCredentials = Depends(security)): + try: + payload = jwt.decode(authorization.credentials, JWT_SECRET_KEY, algorithms=["HS256"]) + username: str = payload.get("sub") + return username + except: + return None +``` + +### app\routers\admin.py + +```python +# app/routers/admin.py +from fastapi import APIRouter, Depends, HTTPException, UploadFile, File +from typing import List +import os +from app.database.database_query import DatabaseQuery +from app.services.vector_database_search import VectorDatabaseSearch +from app.middleware.auth import get_current_user +from pydantic import BaseModel + +router = APIRouter() +vector_db = VectorDatabaseSearch() +query = DatabaseQuery() +TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'temp') +os.makedirs(TEMP_DIR, exist_ok=True) + +class SearchQuery(BaseModel): + query: str + k: int = 5 + +@router.get('/books') +async def get_books(username: str = Depends(get_current_user)): + try: + book_info = vector_db.get_book_info() + return { + 'status': 'success', + 'data': book_info + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/books', status_code=201) +async def add_books(files: List[UploadFile] = File(...), username: str = Depends(get_current_user)): + try: + pdf_paths = [] + for file in files: + if file.filename.endswith('.pdf'): + safe_filename = os.path.basename(file.filename) + temp_path = os.path.join(TEMP_DIR, safe_filename) + + with open(temp_path, "wb") as buffer: + content = await file.read() + buffer.write(content) + + pdf_paths.append(temp_path) + + if not pdf_paths: + raise HTTPException(status_code=400, detail="No valid PDF files provided") + + success_count = 0 + for pdf_path in pdf_paths: + if vector_db.add_pdf(pdf_path): + success_count += 1 + + # Clean up temporary files + for path in pdf_paths: + try: + if os.path.exists(path): + os.remove(path) + except Exception: + pass + + return { + 'status': 'success', + 'message': f'Successfully added {success_count} of {len(pdf_paths)} books' + } + + except Exception as e: + # Clean up temporary files in case of error + for path in pdf_paths: + try: + if os.path.exists(path): + os.remove(path) + except: + pass + + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/search') +async def search_books(search_data: SearchQuery, username: str = Depends(get_current_user)): + try: + query_text = search_data.query + k = search_data.k + + results = vector_db.search( + query=query_text, + top_k=k + ) + + return { + 'status': 'success', + 'data': results + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\agent_chat.py + +```python +# app/routers/agent_chat.py +from fastapi import APIRouter, Depends, HTTPException, Header +from fastapi.responses import StreamingResponse +from pydantic import BaseModel +from typing import Optional +import json +import asyncio +import logging +from app.middleware.auth import get_current_user +from app.services.agent_service import GoogleAgentService + +router = APIRouter() +logger = logging.getLogger(__name__) + +class AgentChatRequest(BaseModel): + session_id: Optional[str] = None + query: str + +async def stream_agent_response(agent_service: GoogleAgentService, query: str): + """Stream agent responses as JSON chunks""" + try: + logger.info(f"Starting stream for query: {query[:50]}...") + + async for chunk in agent_service.process_message_async(query): + logger.debug(f"Streaming chunk type: {chunk['type']}") + + if chunk["type"] == "chunk": + # Send the JSON chunk directly + yield f"data: {json.dumps({'type': 'chunk', 'content': chunk['content']})}\n\n" + elif chunk["type"] == "tool_call": + # Send tool call information + yield f"data: {json.dumps({'type': 'tool_call', 'tool_name': chunk['tool_name'], 'arguments': chunk['arguments']})}\n\n" + elif chunk["type"] == "tool_result": + # Send tool result + yield f"data: {json.dumps({'type': 'tool_result', 'tool_name': chunk['tool_name'], 'result': chunk['result']})}\n\n" + elif chunk["type"] == "completed": + # Send completion signal + yield f"data: {json.dumps({'type': 'completed', 'saved': chunk['saved'], 'session_id': chunk.get('session_id')})}\n\n" + elif chunk["type"] == "error": + # Send error + yield f"data: {json.dumps({'type': 'error', 'message': chunk['content']})}\n\n" + + # Small delay to prevent overwhelming the client + await asyncio.sleep(0.001) + + # Send final done signal + yield f"data: {json.dumps({'type': 'done'})}\n\n" + logger.info("Stream completed successfully") + + except Exception as e: + logger.error(f"Streaming error: {e}", exc_info=True) + yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" + +@router.post('/agent-chat') +async def agent_chat( + request: AgentChatRequest, + authorization: str = Header(None), + username: str = Depends(get_current_user) +): + """ + Stream chat responses using Google Agents SDK + Returns Server-Sent Events stream with JSON content + """ + try: + logger.info(f"Agent chat request from user: {username}, session: {request.session_id}") + + # Extract token from authorization header + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Invalid authorization header") + + token = authorization.split(" ")[1] + + # Initialize agent service + agent_service = GoogleAgentService( + token=token, + session_id=request.session_id + ) + + logger.info(f"Agent service initialized for user: {username}") + + # Return streaming response + return StreamingResponse( + stream_agent_response(agent_service, request.query), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "text/event-stream", + "X-Accel-Buffering": "no", # Disable nginx buffering + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Headers": "*", + } + ) + + except Exception as e: + logger.error(f"Agent chat error for user {username}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/agent-status') +async def agent_status(username: str = Depends(get_current_user)): + """Check if agent service is available""" + try: + logger.info(f"Agent status check for user: {username}") + return { + "status": "available", + "model": "gemini-2.0-flash-exp", + "features": ["web_search", "vector_search", "image_search", "streaming", "json_response", "tool_calls"] + } + except Exception as e: + logger.error(f"Agent status error: {e}", exc_info=True) + return { + "status": "error", + "message": str(e) + } +``` + +### app\routers\auth.py + +```python +# app/routers/auth.py +from fastapi import APIRouter, HTTPException, Depends +from pydantic import BaseModel, EmailStr +from werkzeug.security import generate_password_hash, check_password_hash +from datetime import datetime, timedelta +import random +import string +from sendgrid import SendGridAPIClient +from sendgrid.helpers.mail import Mail +import os +from app.database.database_query import DatabaseQuery +from app.middleware.auth import create_access_token, get_current_user +from dotenv import load_dotenv + + +load_dotenv() + +SENDGRID_API_KEY = os.getenv("SENDGRID_API_KEY") +FROM_EMAIL = os.getenv("FROM_EMAIL") + + +router = APIRouter() +query = DatabaseQuery() + +class LoginRequest(BaseModel): + identifier: str + password: str + +class LoginResponse(BaseModel): + message: str + token: str + +class RegisterRequest(BaseModel): + username: str + email: EmailStr + password: str + name: str + age: int + +class VerifyEmailRequest(BaseModel): + username: str + code: str + +class ResendCodeRequest(BaseModel): + username: str + +class ForgotPasswordRequest(BaseModel): + email: EmailStr + +class ResetPasswordRequest(BaseModel): + token: str + password: str + +class ChatSessionCheck(BaseModel): + session_id: str + +@router.post('/login', response_model=LoginResponse) +async def login(login_data: LoginRequest): + try: + identifier = login_data.identifier + password = login_data.password + + user = query.get_user_by_identifier(identifier) + if user: + if not user.get('is_verified'): + raise HTTPException(status_code=401, detail="Please verify your email before logging in") + + if check_password_hash(user['password'], password): + access_token = create_access_token({"sub": user['username']}) + return {"message": "Login successful", "token": access_token} + + raise HTTPException(status_code=401, detail="Invalid username/email or password") + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/register', status_code=201) +async def register(register_data: RegisterRequest): + try: + username = register_data.username + email = register_data.email + password = register_data.password + name = register_data.name + age = register_data.age + + if query.is_username_or_email_exists(username, email): + raise HTTPException(status_code=409, detail="Username or email already exists") + + verification_code = ''.join(random.choices(string.digits, k=6)) + code_expiration = datetime.utcnow() + timedelta(minutes=10) + hashed_password = generate_password_hash(password) + created_at = datetime.utcnow() + + temp_user = { + 'username': username, + 'email': email, + 'password': hashed_password, + 'name': name, + 'age': age, + 'created_at': created_at, + 'verification_code': verification_code, + 'code_expiration': code_expiration + } + + query.create_or_update_temp_user(username, email, temp_user) + + message = Mail( + from_email=FROM_EMAIL, + to_emails=email, + subject='Verify your email address', + html_content=f''' +

Hi {name},

+

Thank you for registering. Please use the following code to verify your email address:

+

{verification_code}

+

This code will expire in 10 minutes.

+ ''' + ) + + try: + sg = SendGridAPIClient(SENDGRID_API_KEY) + sg.send(message) + except Exception as e: + raise HTTPException(status_code=500, detail="Failed to send verification email") + + return {"message": "Registration successful. A verification code has been sent to your email."} + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/verify-email') +async def verify_email(verify_data: VerifyEmailRequest): + try: + username = verify_data.username + code = verify_data.code + + temp_user = query.get_temp_user_by_username(username) + if not temp_user: + raise HTTPException(status_code=404, detail="User not found or already verified") + + if temp_user['verification_code'] != code: + raise HTTPException(status_code=400, detail="Invalid verification code") + + if datetime.utcnow() > temp_user['code_expiration']: + raise HTTPException(status_code=400, detail="Verification code has expired") + + user_data = temp_user.copy() + user_data['is_verified'] = True + user_data.pop('verification_code', None) + user_data.pop('code_expiration', None) + user_data.pop('_id', None) + + query.create_user_from_data(user_data) + query.delete_temp_user(username) + + # Set default language to English + query.set_user_language(username, "English") + + # Set default theme to light (passing false for dark theme) + query.set_user_theme(username, False) + + default_preferences = { + 'keywords': True, + 'references': True, + 'websearch': False, + 'personalized_recommendations': True, + 'environmental_recommendations': True + } + + query.set_user_preferences(username, default_preferences) + + return {"message": "Email verification successful"} + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/resend-code') +async def resend_code(resend_data: ResendCodeRequest): + try: + username = resend_data.username + + temp_user = query.get_temp_user_by_username(username) + if not temp_user: + raise HTTPException(status_code=404, detail="User not found or already verified") + + verification_code = ''.join(random.choices(string.digits, k=6)) + code_expiration = datetime.utcnow() + timedelta(minutes=10) + + temp_user['verification_code'] = verification_code + temp_user['code_expiration'] = code_expiration + + query.create_or_update_temp_user(username, temp_user['email'], temp_user) + + message = Mail( + from_email=FROM_EMAIL, + to_emails=temp_user['email'], + subject='Your new verification code', + html_content=f''' +

Hi {temp_user['name']},

+

You requested a new verification code. Please use the following code to verify your email address:

+

{verification_code}

+

This code will expire in 10 minutes.

+ ''' + ) + + try: + sg = SendGridAPIClient(SENDGRID_API_KEY) + sg.send(message) + except Exception as e: + raise HTTPException(status_code=500, detail="Failed to send verification email") + + return {"message": "A new verification code has been sent to your email."} + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/checkChatsession') +async def check_chatsession(data: ChatSessionCheck, username: str = Depends(get_current_user)): + session_id = data.session_id + is_chat_exit = query.check_chat_session(session_id) + return {"ischatexit": is_chat_exit} + +@router.get('/check-token') +async def check_token(username: str = Depends(get_current_user)): + try: + return {'valid': True, 'user': username} + except Exception as e: + raise HTTPException(status_code=401, detail=str(e)) + +@router.post('/forgot-password') +async def forgot_password(data: ForgotPasswordRequest): + try: + email = data.email + + user = query.get_user_by_identifier(email) + if not user: + raise HTTPException(status_code=404, detail="Email not found") + + reset_token = ''.join(random.choices(string.ascii_letters + string.digits, k=32)) + expiration = datetime.utcnow() + timedelta(hours=1) + + query.store_reset_token(email, reset_token, expiration) + + reset_link = f"http://localhost:3000/reset-password?token={reset_token}" + + message = Mail( + from_email=FROM_EMAIL, + to_emails=email, + subject='Reset Your Password', + html_content=f''' +

Hi,

+

You requested to reset your password. Click the link below to reset it:

+

Reset Password

+

This link will expire in 1 hour.

+

If you didn't request this, please ignore this email.

+ ''' + ) + + sg = SendGridAPIClient(SENDGRID_API_KEY) + sg.send(message) + + return {"message": "Password reset instructions sent to email"} + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/reset-password') +async def reset_password(data: ResetPasswordRequest): + try: + token = data.token + new_password = data.password + + if not token or not new_password: + raise HTTPException(status_code=400, detail="Token and new password are required") + + reset_info = query.verify_reset_token(token) + if not reset_info: + raise HTTPException(status_code=400, detail="Invalid or expired reset token") + + hashed_password = generate_password_hash(new_password) + query.update_password(reset_info['email'], hashed_password) + + return {"message": "Password successfully reset"} + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\chat.py + +```python +# app/routers/chat.py +import logging +import os +import json +import tempfile +from datetime import datetime + +from bson import ObjectId +from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Header +from fastapi.responses import JSONResponse, FileResponse +from pydantic import BaseModel + +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user, get_optional_user +from app.services import ChatProcessor +from app.services.image_processor import ImageProcessor +from app.services.report_process import Report +from app.services.skincare_scheduler import SkinCareScheduler +from app.services.wheel import EnvironmentalConditions +from app.services.RAG_evaluation import RAGEvaluation + +router = APIRouter() +query = DatabaseQuery() + +class ChatSessionTitleUpdate(BaseModel): + title: str + +@router.get('/image/{filename}') +async def serve_image(filename: str): + try: + # Use an absolute path or environment variable to ensure consistency + upload_dir = os.path.abspath('uploads') + file_path = os.path.join(upload_dir, filename) + + # Add logging to debug + print(f"Attempting to serve file from: {file_path}") + if not os.path.exists(file_path): + print(f"File not found: {file_path}") + raise FileNotFoundError() + + return FileResponse(file_path) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="Image not found") + +@router.post('/chat-sessions', status_code=201) +async def create_chat_session(username: str = Depends(get_current_user)): + try: + session_id = str(ObjectId()) + + chat_session = { + "user_id": username, + "session_id": session_id, + "created_at": datetime.utcnow(), + "last_accessed": datetime.utcnow(), + "title": "New Chat" + } + query.create_chat_session(chat_session) + return {"message": "Chat session created", "session_id": session_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/chat-sessions') +async def get_user_chat_sessions(username: str = Depends(get_current_user)): + try: + sessions = query.get_user_chat_sessions(username) + return sessions + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/chat-sessions/{session_id}') +async def delete_chat_session(session_id: str, username: str = Depends(get_current_user)): + try: + result = query.delete_chat_session(session_id, username) + + if result["session_deleted"]: + return { + "message": "Chat session and associated chats deleted successfully", + "chats_deleted": result["chats_deleted"] + } + raise HTTPException(status_code=404, detail="Chat session not found or unauthorized") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.put('/chat-sessions/{session_id}/title') +async def update_chat_title( + session_id: str, + title_data: ChatSessionTitleUpdate, + username: str = Depends(get_current_user) +): + try: + new_title = title_data.title + + if not query.verify_session(session_id, username): + raise HTTPException(status_code=404, detail="Chat session not found or unauthorized") + + if query.update_chat_session_title(session_id, new_title): + return { + 'message': 'Chat session title updated successfully', + 'session_id': session_id, + 'new_title': new_title + } + + raise HTTPException(status_code=500, detail="Failed to update chat session title") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/chat-sessions/all') +async def delete_all_sessions_and_chats(username: str = Depends(get_current_user)): + try: + result = query.delete_all_user_sessions_and_chats(username) + + return { + "message": "Successfully deleted all chat sessions and chats", + "deleted_chats": result["deleted_chats"], + "deleted_sessions": result["deleted_sessions"] + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/chats/session/{session_id}') +async def get_session_chats(session_id: str, username: str = Depends(get_current_user)): + try: + chats = query.get_session_chats(session_id, username) + return chats + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/export-chat/{session_id}') +async def export_chat(session_id: str, username: str = Depends(get_current_user)): + try: + if not query.verify_session(session_id, username): + raise HTTPException(status_code=404, detail="Chat session not found or unauthorized") + + chats = query.get_session_chats(session_id, username) + + formatted_chats = [] + for chat in chats: + formatted_chat = { + 'query': chat.get('query', ''), + 'response': chat.get('response', ''), + 'references': chat.get('references', []), + 'page_no': chat.get('page_no', ''), + 'date': chat.get('timestamp', ''), + 'chat_id': chat.get('chat_id', '') + } + formatted_chats.append(formatted_chat) + + export_data = { + 'session_id': session_id, + 'export_date': datetime.utcnow().isoformat(), + 'chats': formatted_chats + } + + return export_data + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/export-all-chats') +async def export_all_chats(username: str = Depends(get_current_user)): + try: + all_chats = query.get_all_user_chats(username) + formatted_sessions = [] + for session in all_chats: + formatted_chats = [] + for chat in session['chats']: + formatted_chat = { + 'query': chat.get('query', ''), + 'response': chat.get('response', ''), + 'references': chat.get('references', []), + 'page_no': chat.get('page_no', ''), + 'timestamp': chat.get('timestamp', ''), + 'chat_id': chat.get('chat_id', '') + } + formatted_chats.append(formatted_chat) + + formatted_session = { + 'session_id': session['session_id'], + 'title': session['title'], + 'created_at': session['created_at'], + 'last_accessed': session['last_accessed'], + 'chats': formatted_chats + } + formatted_sessions.append(formatted_session) + + export_data = { + 'user': username, + 'export_date': datetime.utcnow().isoformat(), + 'sessions': formatted_sessions + } + + return export_data + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/web-search') +async def web_search( + data: dict, + authorization: str = Header(None), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + session_id = data.get("session_id") + query = data.get("query") + num_results = data.get("num_results", 3) + num_images = data.get("num_images", 3) + + if not session_id or not query: + return JSONResponse( + status_code=400, + content={"error": "session_id and query are required"} + ) + + chat_processor = ChatProcessor(token=token, session_id=session_id, num_results=num_results, num_images=num_images) + response = chat_processor.web_search(query=query) + + return {"response": response} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/report-analysis') +async def analyze_report( + file: UploadFile = File(...), + query: str = Form(...), + session_id: str = Form(...), + authorization: str = Header(None), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + + if not file.filename: + return JSONResponse( + status_code=400, + content={"status": "error", "error": "Empty file provided"} + ) + + if not query.strip(): + return JSONResponse( + status_code=400, + content={"status": "error", "error": "Query is required"} + ) + + file_extension = file.filename.rsplit('.', 1)[1].lower() if '.' in file.filename else '' + allowed_extensions = { + 'pdf': 'pdf', + 'xlsx': 'excel', + 'xls': 'excel', + 'csv': 'csv', + 'jpg': 'image', + 'jpeg': 'image', + 'png': 'image', + 'doc': 'word', + 'docx': 'word', + 'ppt': 'ppt', + 'txt': 'text', + 'html': 'html' + } + + if file_extension not in allowed_extensions: + return JSONResponse( + status_code=200, + content={ + "status": "success", + "message": f"Unsupported file type. Allowed types: {', '.join(allowed_extensions.keys())}", + "analysis": result + } + ) + + temp_dir = tempfile.mkdtemp() + temp_file_path = os.path.join(temp_dir, file.filename) + + try: + content = await file.read() + with open(temp_file_path, "wb") as f: + f.write(content) + + processor = Report(token=token, session_id=session_id) + result = processor.process_chat( + query=query, + report_file=temp_file_path, + file_type=allowed_extensions[file_extension] + ) + + return { + "status": "success", + "message": "Report analyzed successfully", + "analysis": result + } + finally: + # Clean up temporary files + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + os.rmdir(temp_dir) + + except Exception as e: + logging.error(f"Error in analyze_report: {str(e)}") + raise HTTPException( + status_code=500, + detail={ + "status": "error", + "error": "Internal server error", + "details": str(e) + } + ) + +@router.get('/skin-care-schedule') +async def get_skin_care_schedule( + authorization: str = Header(None), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + scheduler = SkinCareScheduler(token, "session_id") + schedule = scheduler.createTable() + return json.loads(schedule) + except Exception as e: + logging.error(f"Error generating skin care schedule: {str(e)}") + raise HTTPException( + status_code=500, + detail={"error": "Failed to generate skin care schedule"} + ) + +@router.get('/skin-care-wheel') +async def get_skin_care_wheel( + authorization: str = Header(...), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + condition = EnvironmentalConditions(session_id=token) + condition_data = condition.get_conditon() + return condition_data + except Exception as e: + logging.error(f"Error generating skin care wheel: {str(e)}") + raise HTTPException( + status_code=500, + detail={ + "error": "Failed to generate skin care wheel", + "message": "An unexpected error occurred" + } + ) + +@router.post('/image_disease_search') +async def disease_search( + session_id: str = Form(...), + query: str = Form(...), + num_results: int = Form(3), + num_images: int = Form(3), + image: UploadFile = File(...), + authorization: str = Header(...), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + image_processor = ImageProcessor( + token=token, + session_id=session_id, + num_results=num_results, + num_images=num_images, + image=image + ) + response = image_processor.web_search(query=query) + return {"response": response} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/get_rag_evaluation') +async def rag_evaluation( + page: int = Form(3), + page_size: int = Form(3), + authorization: str = Header(...), + username: str = Depends(get_current_user) +): + try: + token = authorization.split(" ")[1] + evaluator = RAGEvaluation( + token=token, + page=page, + page_size=page_size + ) + report = evaluator.generate_evaluation_report() + return {"response": report} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\chat_session.py + +```python +# app/routers/chat_session.py +from datetime import datetime +from bson import ObjectId +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel + +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user + +router = APIRouter() +query = DatabaseQuery() + +class ChatSessionTitleUpdate(BaseModel): + title: str + +@router.post('/chat-sessions', status_code=201) +async def create_chat_session(username: str = Depends(get_current_user)): + try: + session_id = str(ObjectId()) + + chat_session = { + "user_id": username, + "session_id": session_id, + "created_at": datetime.utcnow(), + "last_accessed": datetime.utcnow(), + "title": "New Chat" + } + query.create_chat_session(chat_session) + return {"message": "Chat session created", "session_id": session_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/chat-sessions') +async def get_user_chat_sessions(username: str = Depends(get_current_user)): + try: + sessions = query.get_user_chat_sessions(username) + return sessions + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/chat-sessions/{session_id}') +async def delete_chat_session(session_id: str, username: str = Depends(get_current_user)): + try: + result = query.delete_chat_session(session_id, username) + + if result["session_deleted"]: + return { + "message": "Chat session and associated chats deleted successfully", + "chats_deleted": result["chats_deleted"] + } + raise HTTPException(status_code=404, detail="Chat session not found or unauthorized") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.put('/chat-sessions/{session_id}/title') +async def update_chat_title( + session_id: str, + title_data: ChatSessionTitleUpdate, + username: str = Depends(get_current_user) +): + try: + new_title = title_data.title + + if not query.verify_session(session_id, username): + raise HTTPException(status_code=404, detail="Chat session not found or unauthorized") + + if query.update_chat_session_title(session_id, new_title): + return { + 'message': 'Chat session title updated successfully', + 'session_id': session_id, + 'new_title': new_title + } + + raise HTTPException(status_code=500, detail="Failed to update chat session title") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/chat-sessions/all') +async def delete_all_sessions_and_chats(username: str = Depends(get_current_user)): + try: + result = query.delete_all_user_sessions_and_chats(username) + + return { + "message": "Successfully deleted all chat sessions and chats", + "deleted_chats": result["deleted_chats"], + "deleted_sessions": result["deleted_sessions"] + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/chats/session/{session_id}') +async def get_session_chats(session_id: str, username: str = Depends(get_current_user)): + try: + chats = query.get_session_chats(session_id, username) + return chats + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\language.py + +```python +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from app.middleware.auth import get_current_user + +from app.database.database_query import DatabaseQuery + +router = APIRouter() +query = DatabaseQuery() + +class LanguageSettings(BaseModel): + language: str + +@router.post('/language', status_code=201) +async def set_language( + language_data: LanguageSettings, + username: str = Depends(get_current_user) +): + try: + language = language_data.language + + if not language: + raise HTTPException(status_code=400, detail="Language is required") + + result = query.set_user_language(username, language) + + return { + "message": "Language set successfully", + "language": result["language"] + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/language') +async def get_language(username: str = Depends(get_current_user)): + try: + language = query.get_user_language(username) + + if language is None: + raise HTTPException(status_code=404, detail="Language not set") + + return { + "message": "Language retrieved successfully", + "language": language + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/language') +async def delete_language(username: str = Depends(get_current_user)): + try: + result = query.delete_user_language(username) + + if not result: + raise HTTPException(status_code=404, detail="Language not found or already deleted") + + return { + "message": "Language deleted successfully" + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\location.py + +```python +# app/routers/location.py +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user + +router = APIRouter() +query = DatabaseQuery() + +class LocationData(BaseModel): + location: str + +@router.post('/location', status_code=201) +async def add_location(location_data: LocationData, username: str = Depends(get_current_user)): + try: + location = location_data.location + + if not location: + raise HTTPException(status_code=400, detail="Location is required") + + query.add_or_update_location(username, location) + + return {'message': 'Location added/updated successfully'} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/location') +async def get_location(username: str = Depends(get_current_user)): + try: + location_data = query.get_location(username) + + if not location_data: + raise HTTPException(status_code=404, detail="No location found for this user") + + return {'location': location_data['location']} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\preferences.py + +```python +# app/routers/preferences.py +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from typing import Dict, Any +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user + +router = APIRouter() +query = DatabaseQuery() + +class ThemeSettings(BaseModel): + theme: bool + +@router.get('/preferences') +async def get_preferences(username: str = Depends(get_current_user)): + try: + user_preferences = query.get_user_preferences(username) + return {'preferences': user_preferences} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/preferences') +async def set_preferences(preferences: Dict[str, Any], username: str = Depends(get_current_user)): + try: + preferences_result = query.set_user_preferences(username, preferences) + return { + 'message': 'Preferences updated successfully', + 'preferences': preferences_result + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/theme') +async def get_theme(username: str = Depends(get_current_user)): + try: + user_theme = query.get_user_theme(username) + return {'theme': user_theme} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post('/theme') +async def set_theme(theme_data: ThemeSettings, username: str = Depends(get_current_user)): + try: + theme = theme_data.theme + theme_data = query.set_user_theme(username, theme) + return { + 'message': 'Theme updated successfully', + 'theme': theme_data['theme'] + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\profile.py + +```python +from fastapi import APIRouter, Depends, HTTPException, Body +from pydantic import BaseModel, EmailStr, validator +from typing import Optional +from werkzeug.security import generate_password_hash + +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user + +router = APIRouter() +query = DatabaseQuery() + +class ProfileUpdateRequest(BaseModel): + email: Optional[EmailStr] = None + password: Optional[str] = None + name: Optional[str] = None + age: Optional[int] = None + + @validator('password') + def password_length(cls, v): + if v is not None and len(v) < 6: + raise ValueError('Password must be at least 6 characters') + return v + + @validator('age') + def age_range(cls, v): + if v is not None and (v < 13 or v > 120): + raise ValueError('Age must be between 13 and 120') + return v + +@router.get('/profile') +async def get_profile(username: str = Depends(get_current_user)): + try: + user = query.get_user_profile(username) + + if not user: + raise HTTPException(status_code=404, detail="User not found") + + return { + 'username': user['username'], + 'email': user['email'], + 'name': user['name'], + 'age': user['age'], + 'created_at': user['created_at'] + } + + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.put('/profile') +async def update_profile( + update_data: ProfileUpdateRequest = Body(...), + username: str = Depends(get_current_user) +): + try: + update_fields = {} + + if update_data.email: + if not query.is_valid_email(update_data.email): + raise HTTPException(status_code=400, detail="Invalid email format") + update_fields['email'] = update_data.email + + if update_data.password: + update_fields['password'] = generate_password_hash(update_data.password) + + if update_data.name: + update_fields['name'] = update_data.name + + if update_data.age is not None: + update_fields['age'] = update_data.age + + if update_fields: + if query.update_user_profile(username, update_fields): + return {"message": "Profile updated successfully"} + + return {"message": "No changes made"} + + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/profile') +async def delete_account(username: str = Depends(get_current_user)): + try: + if query.delete_user_account(username): + return {"message": "Account deleted successfully"} + + raise HTTPException(status_code=404, detail="User not found") + + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/delete-account-permanently') +async def delete_account_permanently(username: str = Depends(get_current_user)): + try: + result = query.delete_account_permanently(username) + + if result['success']: + return { + 'message': 'Account and all associated data deleted successfully', + 'details': result['deleted_data'] + } + else: + raise HTTPException(status_code=500, detail="Failed to delete account") + + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\routers\questionnaire.py + +```python +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from typing import Dict, Any + +from app.database.database_query import DatabaseQuery +from app.middleware.auth import get_current_user + +router = APIRouter() +query = DatabaseQuery() + +class QuestionnaireSubmission(BaseModel): + answers: Dict[str, Any] + +@router.post('/questionnaires', status_code=201) +async def submit_questionnaire( + submission: QuestionnaireSubmission, + username: str = Depends(get_current_user) +): + try: + if not submission.answers: + raise HTTPException(status_code=400, detail="Answers are required") + + questionnaire_id = query.submit_questionnaire(username, submission.answers) + return { + 'message': 'Questionnaire submitted successfully', + 'questionnaire_id': questionnaire_id + } + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/questionnaires') +async def get_questionnaire(username: str = Depends(get_current_user)): + try: + questionnaire = query.get_latest_questionnaire(username) + + if not questionnaire: + return {'message': 'No questionnaire found', 'data': None} + + return {'message': 'Success', 'data': questionnaire} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.put('/questionnaires/{questionnaire_id}') +async def update_questionnaire( + questionnaire_id: str, + submission: QuestionnaireSubmission, + username: str = Depends(get_current_user) +): + try: + if not submission.answers: + raise HTTPException(status_code=400, detail="Answers are required") + + if query.update_questionnaire(questionnaire_id, username, submission.answers): + return {'message': 'Questionnaire updated successfully'} + + raise HTTPException( + status_code=404, + detail='Questionnaire not found or unauthorized' + ) + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.delete('/questionnaires/{questionnaire_id}') +async def delete_questionnaire( + questionnaire_id: str, + username: str = Depends(get_current_user) +): + try: + if query.delete_questionnaire(questionnaire_id, username): + return {'message': 'Questionnaire deleted successfully'} + + raise HTTPException( + status_code=404, + detail='Questionnaire not found or unauthorized' + ) + except Exception as e: + if isinstance(e, HTTPException): + raise e + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/check-answers') +async def check_answers(username: str = Depends(get_current_user)): + try: + answered_count = query.count_answered_questions(username) + return {'has_at_least_two_answers': answered_count >= 2} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.get('/check-questionnaire') +async def check_questionnaire_submission(username: str = Depends(get_current_user)): + try: + questionnaire = query.get_latest_questionnaire(username) + has_questionnaire = questionnaire is not None + return {'has_questionnaire': has_questionnaire} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +``` + +### app\services\__init__.py + +```python +# app/services/__init__.py +from app.services.image_processor import ImageProcessor +from app.services.image_classification_vit import SkinDiseaseClassifier +from app.services.llm_model import Model +from app.services.chat_processor import ChatProcessor +from app.services.chathistory import ChatSession +from app.services.environmental_condition import EnvironmentalData +from app.services.prompts import * +from app.services.RAG_evaluation import RAGEvaluation +from app.services.report_process import Report +from app.services.skincare_scheduler import SkinCareScheduler +from app.services.vector_database_search import VectorDatabaseSearch +from app.services.websearch import WebSearch +from app.services.wheel import EnvironmentalConditions +from app.services.MagicConvert import MagicConvert + +__all__ = [ + "ImageProcessor", + "AISkinDetector", + "SkinDiseaseClassifier", + "Model", + "ChatProcessor", + "ChatSession", + "EnvironmentalData", + "RAGEvaluation", + "Report", + "SkinCareScheduler", + "VectorDatabaseSearch", + "WebSearch", + "EnvironmentalConditions" + "MagicConvert" +] +``` + +### app\services\agent_service.py + +```python +import asyncio +import os +import sys +from typing import Dict, Any, Optional, AsyncGenerator +from datetime import datetime, timezone +from google.adk.agents import Agent +from google.adk.runners import InMemoryRunner +from google.genai import types +from app.services.tools import get_web_search, get_vector_search, get_image_search +from app.services.chathistory import ChatSession +from app.services.environmental_condition import EnvironmentalData +from app.services.agentic_prompt import get_web_search_prompt, get_vector_search_prompt +from app.database.database_query import DatabaseQuery +import logging +import json + +if sys.platform.startswith('win'): + asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Set Google API key +GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY") +os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY + +class GoogleAgentService: + def __init__(self, token: str, session_id: Optional[str] = None): + logger.info(f"Initializing GoogleAgentService with session: {session_id}") + self.token = token + self.session_id = session_id + self.chat_session = ChatSession(token, session_id) + self.query_db = DatabaseQuery() + self.user_profile = self._get_user_profile() + self.user_preferences = self._get_user_preferences() + self.user_city = self.chat_session.get_city() + self.environment_data = self._get_environmental_data() + self.language = self.chat_session.get_language() + self.agent = None + self.runner = None + self.agent_session = None + self.tool_calls = [] # Track tool calls for frontend display + self.images_found = [] # Track images found during processing + + logger.info(f"User preferences: {self.user_preferences}") + logger.info(f"User city: {self.user_city}") + logger.info(f"Language: {self.language}") + + # Initialize the appropriate agent + self._initialize_agent() + + def _get_user_profile(self) -> Dict[str, Any]: + """Get user profile information""" + try: + profile = self.chat_session.get_name_and_age() + logger.info(f"Retrieved user profile: {profile}") + return { + 'name': profile.get('name', 'Patient'), + 'age': profile.get('age', 'Unknown') + } + except Exception as e: + logger.error(f"Error getting user profile: {e}") + return {'name': 'Patient', 'age': 'Unknown'} + + def _get_user_preferences(self) -> Dict[str, Any]: + """Get user preferences from database""" + try: + preferences = self.chat_session.get_user_preferences() + logger.info(f"Retrieved user preferences: {preferences}") + return preferences + except Exception as e: + logger.error(f"Error getting user preferences: {e}") + return { + 'websearch': False, + 'keywords': True, + 'references': True, + 'environmental_recommendations': False, + 'personalized_recommendations': False + } + + def _get_environmental_data(self) -> str: + """Get environmental data if user has provided location""" + try: + if self.user_city: + env_data = EnvironmentalData(self.user_city) + data = str(env_data.get_environmental_data()) + logger.info(f"Retrieved environmental data for {self.user_city}: {data[:100]}...") + return data + logger.info("No user city provided, skipping environmental data") + return "" + except Exception as e: + logger.error(f"Error getting environmental data: {e}") + return "" + + def _get_personalized_data(self) -> str: + """Get personalized recommendations data""" + try: + data = self.chat_session.get_personalized_recommendation() or "" + if data: + logger.info(f"Retrieved personalized data: {data[:100]}...") + return data + except Exception as e: + logger.error(f"Error getting personalized data: {e}") + return "" + + def _prepare_user_data(self) -> Dict[str, Any]: + """Prepare all user data for prompt generation""" + user_data = { + 'name': self.user_profile.get('name'), + 'age': self.user_profile.get('age'), + 'language': self.language, + 'personalized_recommendations': self.user_preferences.get('personalized_recommendations'), + 'environmental_recommendations': self.user_preferences.get('environmental_recommendations'), + 'personalized_data': self._get_personalized_data() if self.user_preferences.get('personalized_recommendations') else "", + 'environmental_data': self.environment_data if self.user_preferences.get('environmental_recommendations') else "" + } + logger.info(f"Prepared user data: {user_data}") + return user_data + + def _initialize_agent(self): + """Initialize the appropriate agent based on user preferences""" + from google.adk.tools import FunctionTool + + user_data = self._prepare_user_data() + + # Create function tools + web_search_tool = FunctionTool(func=get_web_search) + vector_search_tool = FunctionTool(func=get_vector_search) + image_search_tool = FunctionTool(func=get_image_search) + + if self.user_preferences.get('websearch', False): + # Create web search agent + logger.info("Initializing web search agent with tools") + tools_list = [web_search_tool, image_search_tool] + logger.info(f"Web search agent tools: {[tool.name for tool in tools_list]}") + + self.agent = Agent( + name="web_search_agent", + model="gemini-2.0-flash-exp", + description="Expert dermatologist assistant using web search", + instruction=get_web_search_prompt(user_data), + tools=tools_list, + ) + else: + # Create vector search agent + logger.info("Initializing vector search agent with tools") + tools_list = [vector_search_tool, image_search_tool] + logger.info(f"Vector search agent tools: {[tool.name for tool in tools_list]}") + + self.agent = Agent( + name="vector_search_agent", + model="gemini-2.0-flash-exp", + description="Expert dermatologist assistant using medical knowledge base", + instruction=get_vector_search_prompt(user_data), + tools=tools_list, + ) + + # Initialize runner + self.runner = InMemoryRunner( + agent=self.agent, + app_name='dermai_chat_app', + ) + logger.info(f"Agent and runner initialized successfully with {len(tools_list)} tools") + + async def create_session(self) -> Optional[Any]: + """Create a new agent session""" + try: + logger.info(f"Creating new agent session for user: {self.chat_session.identity}") + session = await self.runner.session_service.create_session( + app_name='dermai_chat_app', + user_id=self.chat_session.identity + ) + self.agent_session = session + logger.info(f"Agent session created with ID: {session.id}") + return session + except Exception as e: + logger.error(f"Error creating session: {e}", exc_info=True) + return None + + async def process_message_async(self, query: str) -> AsyncGenerator[Dict[str, Any], None]: + """Process message and yield streaming responses""" + try: + logger.info(f"Processing message: {query[:100]}...") + + # Reset tracking variables + self.tool_calls = [] + self.images_found = [] + + # Ensure session is created + if not self.agent_session: + logger.info("Creating new agent session...") + await self.create_session() + + if not self.agent_session: + logger.error("Failed to create agent session") + yield { + "type": "error", + "content": "Failed to create agent session" + } + return + + # Create message content + content = types.Content( + role='user', + parts=[types.Part.from_text(text=query)] + ) + + # Track response for final processing + full_response = "" + current_text = "" + + logger.info("Starting agent execution...") + logger.info(f"Available tools: {[tool.name for tool in self.agent.tools] if self.agent.tools else 'No tools'}") + + # Stream the response using run_async (synchronous method doesn't work well with streaming) + try: + events = self.runner.run_async( + user_id=self.chat_session.identity, + session_id=self.agent_session.id, + new_message=content, + ) + + async for event in events: + logger.debug(f"Received event: {type(event).__name__}") + logger.debug(f"Event attributes: {[attr for attr in dir(event) if not attr.startswith('_')]}") + + # Handle different event types + if hasattr(event, 'content') and event.content: + logger.debug("Processing content event") + if hasattr(event.content, 'parts') and event.content.parts: + for part in event.content.parts: + if hasattr(part, 'text') and part.text: + # Handle streaming text + text_content = part.text + if text_content != current_text: + new_chunk = text_content[len(current_text):] if current_text else text_content + if new_chunk: + logger.debug(f"Streaming chunk: {new_chunk[:50]}...") + yield { + "type": "chunk", + "content": new_chunk + } + current_text = text_content + full_response = text_content + + # Check if this is a final response event + if hasattr(event, 'is_final_response') and callable(event.is_final_response): + if event.is_final_response(): + logger.info("Received final response event") + + # Handle function calls - check for tool execution events + if hasattr(event, 'function_call') and event.function_call: + logger.info(f"Function call detected: {event.function_call}") + tool_call_info = { + 'tool_name': event.function_call.name if hasattr(event.function_call, 'name') else 'unknown', + 'arguments': dict(event.function_call.args) if hasattr(event.function_call, 'args') else {} + } + self.tool_calls.append(tool_call_info) + yield { + "type": "tool_call", + "tool_name": tool_call_info['tool_name'], + "arguments": tool_call_info['arguments'] + } + + # Handle function responses + if hasattr(event, 'function_response') and event.function_response: + logger.info(f"Function response detected: {str(event.function_response)[:200]}...") + + # Check if this is an image search result + if hasattr(event.function_response, 'content'): + response_content = event.function_response.content + if isinstance(response_content, dict) and 'images' in response_content: + images = response_content.get('images', []) + if images: + self.images_found.extend(images) + logger.info(f"Found {len(images)} images from tool") + + yield { + "type": "tool_result", + "tool_name": getattr(event.function_response, 'name', 'unknown'), + "result": getattr(event.function_response, 'content', {}) + } + + except Exception as stream_error: + logger.error(f"Error in streaming: {stream_error}", exc_info=True) + # Fallback to synchronous run if async streaming fails + logger.info("Falling back to synchronous execution...") + + for event in self.runner.run( + user_id=self.chat_session.identity, + session_id=self.agent_session.id, + new_message=content, + ): + logger.debug(f"Sync event: {type(event).__name__}") + + if hasattr(event, 'content') and event.content and hasattr(event.content, 'parts') and event.content.parts: + for part in event.content.parts: + if hasattr(part, 'text') and part.text: + text_content = part.text + if text_content != current_text: + new_chunk = text_content[len(current_text):] if current_text else text_content + if new_chunk: + yield { + "type": "chunk", + "content": new_chunk + } + current_text = text_content + full_response = text_content + + logger.info(f"Agent execution completed. Full response length: {len(full_response)}") + logger.info(f"Tool calls made: {len(self.tool_calls)}") + logger.info(f"Images found: {len(self.images_found)}") + + # Process the final response + if full_response: + try: + # Try to parse the response as JSON + json_response = json.loads(full_response) + response_text = json_response.get('response', full_response) + keywords = json_response.get('keywords', []) + + # Merge images from tool calls with images in response + response_images = json_response.get('images', []) + all_images = list(set(self.images_found + response_images)) # Remove duplicates + + # Update the JSON response with all found images + json_response['images'] = all_images + + logger.info(f"Parsed JSON response successfully. Keywords: {keywords}, Images: {len(all_images)}") + + except json.JSONDecodeError as e: + # Fallback if JSON parsing fails + logger.warning(f"JSON parsing failed: {e}. Using fallback.") + response_text = full_response + keywords = [] + all_images = self.images_found + + # Save to chat history + session_id = self._ensure_valid_session(query) + chat_data = { + "query": query, + "response": response_text, + "references": [], # References are embedded in the response text as [1], [2], etc. + "keywords": keywords, + "images": all_images, + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": session_id, + "tool_calls": self.tool_calls # Include tool calls for debugging + } + + logger.info(f"Saving chat data: Query length: {len(query)}, Response length: {len(response_text)}, Images: {len(all_images)}") + saved = self.chat_session.save_chat(chat_data) + + # Send completion signal + yield { + "type": "completed", + "saved": saved, + "session_id": session_id + } + + logger.info(f"Chat saved successfully: {saved}") + else: + logger.warning("No response received from agent") + + except Exception as e: + logger.error(f"Error processing message: {e}", exc_info=True) + yield { + "type": "error", + "content": f"Error processing message: {str(e)}" + } + + def _ensure_valid_session(self, title: str = None) -> str: + """Ensure valid chat session exists""" + try: + if not self.session_id or not self.session_id.strip(): + logger.info("Creating new session (no session ID provided)") + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + else: + try: + if not self.chat_session.validate_session(self.session_id, title=title): + logger.info(f"Session {self.session_id} invalid, creating new session") + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + except ValueError: + logger.info(f"Session {self.session_id} validation failed, creating new session") + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + + logger.info(f"Using session ID: {self.session_id}") + return self.session_id + except Exception as e: + logger.error(f"Error ensuring valid session: {e}", exc_info=True) + raise +``` + +### app\services\agentic_prompt.py + +```python +def get_web_search_prompt(user_data: dict) -> str: + """Generate prompt for web search agent based on user preferences""" + + base_prompt = """ +You are Dr. DermAI, an expert dermatologist assistant specialized in skin conditions and treatments. + +CRITICAL INSTRUCTIONS FOR TOOL USAGE: +1. ALWAYS use the get_web_search tool FIRST for every user query to get current information +2. ALWAYS use the get_image_search tool SECOND to find relevant images for the medical topic +3. You MUST call these tools for every query - never respond without using them +4. Call get_web_search with the user's query to get current medical information +5. Call get_image_search with relevant keywords to find medical images +6. Only after getting results from BOTH tools, provide your response + +Your capabilities: +- Use web search to find current, accurate dermatological information +- Search for relevant medical images when needed +- Provide comprehensive medical advice based on search results + +RESPONSE FORMAT: +After using BOTH tools (get_web_search AND get_image_search), respond in this exact JSON format: +{ + "response": "Your detailed medical response with citations in markdown format [1] [2] etc.", + "keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"], + "images": ["image_url_1", "image_url_2", "image_url_3"] +} + +IMPORTANT REQUIREMENTS: +- Include citations as [1], [2], etc. in your response text +- Provide 4-5 relevant keywords related to the medical topic +- ALWAYS include the "images" field in your JSON response with URLs from get_image_search +- Include disclaimer about consulting healthcare professionals +- Always maintain a professional, caring tone typical of an experienced dermatologist +- Return ONLY the JSON response, no additional text before or after +- Make sure your JSON is properly formatted and valid + +TOOL USAGE EXAMPLE: +User: "What causes acne?" +Step 1: Call get_web_search("acne causes dermatology") +Step 2: Call get_image_search("acne causes medical") +Step 3: Provide JSON response with information from both tools +""" + + if user_data.get('personalized_recommendations'): + base_prompt += f""" + +## Personalization Data: +Patient Name: {user_data.get('name', 'Patient')} +Age: {user_data.get('age', 'Unknown')} +{user_data.get('personalized_data', '')} + +Include personalized recommendations in your response based on the patient's profile. +""" + + if user_data.get('environmental_recommendations'): + base_prompt += f""" + +## Environmental Conditions: +{user_data.get('environmental_data', '')} + +Include environmental considerations and suggestions at the end of your response. +""" + + if user_data.get('language', 'english').lower() != 'english': + base_prompt += f""" + +IMPORTANT: Respond EXCLUSIVELY in {user_data.get('language')} language, but maintain the JSON structure. +""" + + return base_prompt + +def get_vector_search_prompt(user_data: dict) -> str: + """Generate prompt for vector search agent based on user preferences""" + + base_prompt = """ +You are Dr. DermAI, an expert dermatologist assistant with access to a specialized medical knowledge base. + +CRITICAL INSTRUCTIONS FOR TOOL USAGE: +1. ALWAYS use the get_vector_search tool FIRST for every user query to search the knowledge base +2. ALWAYS use the get_image_search tool SECOND to find relevant images for the medical topic +3. You MUST call these tools for every query - never respond without using them +4. Call get_vector_search with the user's query to get medical information from the database +5. Call get_image_search with relevant keywords to find medical images +6. Only after getting results from BOTH tools, provide your response + +Your capabilities: +- Search the medical database for dermatological information +- Find relevant medical images +- Provide evidence-based medical advice + +RESPONSE FORMAT: +After using BOTH tools (get_vector_search AND get_image_search), respond in this exact JSON format: +{ + "response": "Your detailed medical response with citations in markdown format [1] [2] etc.", + "keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"], + "images": ["image_url_1", "image_url_2", "image_url_3"] +} + +IMPORTANT REQUIREMENTS: +- Include citations as [1], [2], etc. in your response text +- Provide 4-5 relevant keywords related to the medical topic +- ALWAYS include the "images" field in your JSON response with URLs from get_image_search +- Add appropriate medical disclaimers +- Always maintain a professional, caring tone typical of an experienced dermatologist +- Return ONLY the JSON response, no additional text before or after +- Make sure your JSON is properly formatted and valid + +TOOL USAGE EXAMPLE: +User: "What causes acne?" +Step 1: Call get_vector_search("acne causes dermatology") +Step 2: Call get_image_search("acne causes medical") +Step 3: Provide JSON response with information from both tools +""" + + if user_data.get('personalized_recommendations'): + base_prompt += f""" + +## Personalization Data: +Patient Name: {user_data.get('name', 'Patient')} +Age: {user_data.get('age', 'Unknown')} +{user_data.get('personalized_data', '')} + +Include personalized recommendations in your response based on the patient's profile. +""" + + if user_data.get('environmental_recommendations'): + base_prompt += f""" + +## Environmental Conditions: +{user_data.get('environmental_data', '')} + +Include environmental considerations and suggestions at the end of your response. +""" + + if user_data.get('language', 'english').lower() != 'english': + base_prompt += f""" + +IMPORTANT: Respond EXCLUSIVELY in {user_data.get('language')} language, but maintain the JSON structure. +""" + + return base_prompt +``` + +### app\services\chat_processor.py + +```python +from datetime import datetime, timezone +from typing import Optional, Dict, Any +from concurrent.futures import ThreadPoolExecutor +from yake import KeywordExtractor +from app.services.chathistory import ChatSession +from app.services.websearch import WebSearch +from app.services.llm_model import Model +from app.services.environmental_condition import EnvironmentalData +from app.services.prompts import * +from app.services.vector_database_search import VectorDatabaseSearch +import re +import logging + +logger = logging.getLogger(__name__) + +# Initialize vector database with error handling +try: + vectordb = VectorDatabaseSearch() +except Exception as e: + logger.error(f"Failed to initialize vector database: {e}") + vectordb = None + +class ChatProcessor: + def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3): + self.token = token + self.session_id = session_id + self.num_results = num_results + self.num_images = num_images + self.chat_session = ChatSession(token, session_id) + self.user_city = self.chat_session.get_city() + city = self.user_city if self.user_city else '' + self.environment_data = EnvironmentalData(city) + self.web_searcher = WebSearch(num_results=num_results, max_images=num_images) + self.web_search_required = True + + def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list: + lang_code = "en" + if language.lower() == "urdu": + lang_code = "ur" + + kw_extractor = KeywordExtractor( + lan=lang_code, + n=max_ngram_size, + top=num_keywords, + features=None + ) + keywords = kw_extractor.extract_keywords(text) + return [kw[0] for kw in keywords] + + def ensure_valid_session(self, title: str = None) -> str: + if not self.session_id or not self.session_id.strip(): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + else: + try: + if not self.chat_session.validate_session(self.session_id, title=title): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + except ValueError: + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + return self.session_id + + def process_chat(self, query: str) -> Dict[str, Any]: + try: + profile = self.chat_session.get_name_and_age() + name = profile['name'] + age = profile['age'] + self.chat_session.load_chat_history() + self.chat_session.update_title(self.session_id, query) + history = self.chat_session.format_history() + + # Enhanced query generation + history_based_prompt = HISTORY_BASED_PROMPT.format(history=history, query=query) + enhanced_query = Model().send_message_openrouter(history_based_prompt) + + self.session_id = self.ensure_valid_session(title=enhanced_query) + permission = self.chat_session.get_user_preferences() + websearch_enabled = permission.get('websearch', False) + env_recommendations = permission.get('environmental_recommendations', False) + personalized_recommendations = permission.get('personalized_recommendations', False) + keywords_permission = permission.get('keywords', False) + reference_permission = permission.get('references', False) + language = self.chat_session.get_language().lower() + + language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language) + + # Check if vector database is available when websearch is disabled + vector_db_available = vectordb and vectordb.is_available() if not websearch_enabled else False + + # If websearch is disabled and vector DB is not available, enable websearch as fallback + use_websearch = websearch_enabled or not vector_db_available + + if use_websearch: + logger.info("Using web search for context") + with ThreadPoolExecutor(max_workers=2) as executor: + future_web = executor.submit(self.web_searcher.search, enhanced_query) + future_images = executor.submit(self.web_searcher.search_images, enhanced_query) + web_results = future_web.result() + image_results = future_images.result() + + context_parts = [] + references = [] + + for idx, result in enumerate(web_results, 1): + if result['text']: + context_parts.append(f"From Source {idx}: {result['text']}\n") + references.append(result['link']) + + context = "\n".join(context_parts) + + # If web search returns no results, provide a helpful context + if not context: + context = "No specific information found. Please provide general dermatological advice based on your expertise." + + else: + logger.info("Using vector database for context") + attach_image = False + + with ThreadPoolExecutor(max_workers=1) as executor: + future_images = executor.submit(self.web_searcher.search_images, enhanced_query) + image_results = future_images.result() + + start_time = datetime.now(timezone.utc) + + # Search vector database + if vectordb: + results = vectordb.search(query=enhanced_query, top_k=5) # Increased top_k for better results + else: + results = [] + + context_parts = [] + references = [] + seen_pages = set() + + for result in results: + confidence = result.get('confidence', 0) + # Lowered confidence threshold for better recall + if confidence > 30: + context_parts.append(f"Content: {result['content']}") + source = result.get('source', 'Unknown') + page = result.get('page', 0) + page_key = f"{source}_{page}" + if page_key not in seen_pages: + references.append(f"Source: {source}, Page: {page}") + seen_pages.add(page_key) + attach_image = True + + context = "\n".join(context_parts) + + # Provide more helpful context when vector search returns nothing + if not context or len(context) < 50: + logger.warning("Vector database returned insufficient context") + # Fall back to web search if available + if self.web_searcher: + logger.info("Falling back to web search due to insufficient vector results") + web_results = self.web_searcher.search(enhanced_query) + context_parts = [] + references = [] + for idx, result in enumerate(web_results[:3], 1): + if result['text']: + context_parts.append(f"From Source {idx}: {result['text']}\n") + references.append(result['link']) + context = "\n".join(context_parts) + + if not context: + context = "Based on general dermatological knowledge and best practices." + attach_image = False + + end_time = datetime.now(timezone.utc) + + # Generate appropriate prompt based on user preferences + if env_recommendations and personalized_recommendations: + prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + history=history, + user_details=self.chat_session.get_personalized_recommendation(), + environmental_condition=self.environment_data.get_environmental_data(), + previous_history=history, + context=context, + current_query=enhanced_query + ) + elif personalized_recommendations: + prompt = PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + user_details=self.chat_session.get_personalized_recommendation(), + previous_history=history, + context=context, + current_query=enhanced_query + ) + elif env_recommendations: + prompt = ENVIRONMENTAL_PROMPT.format( + user_name=name, + user_age=age, + environmental_condition=self.environment_data.get_environmental_data(), + previous_history=history, + context=context, + current_query=enhanced_query + ) + else: + prompt = DEFAULT_PROMPT.format( + previous_history=history, + context=context, + current_query=enhanced_query + ) + + prompt = prompt + "\n" + language_prompt + + # Generate response + response = Model().llm(prompt, enhanced_query) + + # Extract keywords if enabled + keywords = "" + if keywords_permission: + keywords = self.extract_keywords_yake(response, language=language) + + if not reference_permission: + references = "" + + # Prepare images + if not use_websearch and not attach_image: + image_results = "" + keywords = "" + + # Prepare chat data + chat_data = { + "query": enhanced_query, + "response": response, + "references": references, + "page_no": "", + "keywords": keywords, + "images": image_results if 'image_results' in locals() else "", + "context": context, + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + # Save RAG details if using vector database + if not use_websearch and 'start_time' in locals() and 'end_time' in locals(): + match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response) + truncated_response = response[:match.start()].strip() if match else response + + if not self.chat_session.save_details( + session_id=self.session_id, + context=context, + query=enhanced_query, + response=truncated_response, + rag_start_time=start_time, + rag_end_time=end_time + ): + logger.warning("Failed to save RAG details") + + # Save chat + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + + return chat_data + + except Exception as e: + logger.error(f"Error in process_chat: {str(e)}") + return { + "error": str(e), + "query": query, + "response": "I apologize, but I'm experiencing technical difficulties. Please try again or enable web search in your preferences for better results.", + "timestamp": datetime.now(timezone.utc).isoformat() + } + + def web_search(self, query: str) -> Dict[str, Any]: + """Public method for web search endpoint""" + return self.process_chat(query=query) + +``` + +### app\services\chathistory.py + +```python +from app.database.database_query import DatabaseQuery +import os +import jwt +from dotenv import load_dotenv +from typing import Optional, Dict, List +from bson import ObjectId +from datetime import datetime + +load_dotenv() +jwt_secret_key = os.getenv('JWT_SECRET_KEY') +query = DatabaseQuery() + +class ChatSession: + def __init__(self, token: str , session_id: str): + self.token = token + self.session_id = session_id + self.chats = [] + self.identity = self._decode_token(token) + self.query = query + + def _decode_token(self, token: str) -> str: + try: + decoded_token = jwt.decode(token, jwt_secret_key, algorithms=["HS256"]) + identity = decoded_token['sub'] + return identity + except jwt.ExpiredSignatureError: + raise ValueError("The token has expired.") + except jwt.InvalidTokenError: + raise ValueError("Invalid token.") + except Exception as e: + raise ValueError(f"Failed to decode token: {e}") + + def get_user_preferences(self) -> dict: + current_user = self.identity + preferences = self.query.get_user_preferences(current_user) + if preferences is not None: + return preferences + raise ValueError("Failed to fetch user preferences.") + + def get_personalized_recommendation(self) -> Optional[str]: + current_user = self.identity + response = self.query.get_latest_questionnaire(current_user) + + if not response: + return None + + answers = response.get('answers', {}) + if not answers: + return None + + def format_answer(answer): + if answer is None: + return None + if isinstance(answer, str): + stripped_answer = answer.strip().lower() + if stripped_answer in ['none', ''] or len(stripped_answer) < 3: + return None + return stripped_answer + if isinstance(answer, list): + filtered_answer = [item for item in answer if "Other" not in item + and item.strip().lower() not in ['none', ''] + and len(item.strip()) >= 3] + return ", ".join(filtered_answer) if filtered_answer else None + return answer + + questions = { + "skinType": "How would you describe your skin type?", + "currentConditions": "Do you currently have any skin conditions?", + "autoImmuneConditions": "Do you have a history of autoimmune or hormonal conditions?", + "allergies": "Do you have any known allergies to skincare ingredients?", + "medications": "Are you currently taking any medications that might affect your skin?", + "hormonal": "Do you experience hormonal changes that affect your skin?", + "diet": "Have you noticed any foods that trigger skin reactions?", + "diabetes": "Do you have diabetes?", + "outdoorTime": "How much time do you spend outdoors during the day?", + "sleep": "How many hours of sleep do you get on average?", + "familyHistory": "Do you have a family history of skin conditions?", + "products": "What skincare products are you currently using?" + } + + valid_answers = {key: format_answer(answers.get(key)) + for key in questions + if format_answer(answers.get(key)) is not None} + + if not valid_answers: + return None + + formatted_response = [] + for key, answer in valid_answers.items(): + question = questions.get(key) + formatted_response.append(f"question: {question}\nUser answer: {answer}") + + profile = self.get_profile() + name = profile.get('name', 'Unknown') + age = profile.get('age', 'Unknown') + + return f"user name: {name}\nuser age: {age}\n\n" + "\n\n".join(formatted_response) + + + def create_new_session(self, title: str = None) -> bool: + current_user = self.identity + session_id = str(ObjectId()) + + chat_session = { + "user_id": current_user, + "session_id": session_id, + "created_at": datetime.utcnow(), + "last_accessed": datetime.utcnow(), + "title": title if title else "New Chat" + } + + try: + self.query.create_chat_session(chat_session) + self.session_id = session_id + return True + except Exception as e: + raise Exception(f"Failed to create session: {str(e)}") + + def verify_session_exists(self, session_id: str) -> bool: + current_user = self.identity + return self.query.verify_session(session_id, current_user) + + def validate_session(self, session_id: Optional[str] = None, title: str = None) -> bool: + if not session_id or not session_id.strip(): + return self.create_new_session(title=title) + + if self.verify_session_exists(session_id): + self.session_id = session_id + return self.load_chat_history() + + return self.create_new_session(title=title) + + def load_session(self, session_id: str) -> bool: + return self.validate_session(session_id) + + def load_chat_history(self) -> bool: + if not self.session_id: + raise ValueError("No session ID provided.") + + current_user = self.identity + try: + self.chats = self.query.get_session_chats(self.session_id, current_user) + return True + except Exception as e: + raise Exception(f"Failed to load chat history: {str(e)}") + + def get_chat_history(self) -> List[Dict]: + return self.chats + + def format_history(self) -> str: + formatted_chats = [] + for chat in self.chats: + query = chat.get('query', '').strip() + response = chat.get('response', '').strip() + if query and response: + formatted_chats.append(f"User: {query}") + formatted_chats.append(f"dermatologist Dr DermAI: {response}") + return "\n".join(formatted_chats) if formatted_chats else "" + + def save_chat(self, chat_data: Dict) -> bool: + if not self.session_id: + raise ValueError("No active session to save chat") + + current_user = self.identity + + data = { + "user_id": current_user, + "session_id": self.session_id, + "query": chat_data.get("query", "").strip(), + "response": chat_data.get("response", "").strip(), + "references": chat_data.get("references", []), + "page_no": chat_data.get("page_no", []), + "keywords": chat_data.get("keywords", []), + "images": chat_data.get("images", []), + "context": chat_data.get("context", ""), + "timestamp": datetime.utcnow(), + "chat_id": str(ObjectId()) + } + + try: + if self.query.create_chat(data): + self.query.update_last_accessed_time(self.session_id) + self.chats.append(data) + return True + return False + except Exception as e: + raise Exception(f"Failed to save chat: {str(e)}") + + def get_name_and_age(self): + current_user = self.identity + try: + user_profile = self.query.get_user_profile(current_user) + return user_profile + except Exception as e: + raise Exception(f"Failed to get user name and age: {str(e)}") + + def get_profile(self): + current_user = self.identity + try: + user = query.get_user_profile(current_user) + if not user: + return {'error': 'User not found'} + return { + 'username': user['username'], + 'email': user['email'], + 'name': user['name'], + 'age': user['age'], + 'created_at': user['created_at'] + } + except Exception as e: + return {'error': str(e)} + + def update_title(self , sessionId , new_title): + query.update_chat_session_title(sessionId, new_title) + + def get_city(self) -> Optional[str]: + current_user = self.identity + try: + location_data = self.query.get_location(current_user) + if location_data and 'location' in location_data: + return location_data['location'] + return None + except Exception as e: + raise Exception(f"Failed to get user city: {str(e)}") + + def get_language(self) -> Optional[str]: + current_user = self.identity + try: + language = self.query.get_user_language(current_user) + if not language : + return "english" + else: + return language + return None + except Exception as e: + raise Exception(f"Failed to get user city: {str(e)}") + + + def get_language(self) -> Optional[str]: + current_user = self.identity + try: + language = self.query.get_user_language(current_user) + if not language : + return "english" + else: + return language + return None + except Exception as e: + raise Exception(f"Failed to get user city: {str(e)}") + + def get_today_schedule(self): + data = self.query.get_today_schedule(user_id=self.identity) + if not data: + return "" + return data + + def save_schedule(self, schedule_data): + return self.query.save_schedule(user_id=self.identity, schedule_data=schedule_data) + + def get_last_seven_days_schedules(self): + data = self.query.get_last_seven_days_schedules(user_id=self.identity) + if not data: + return "" + return data + + + def save_details(self, session_id, context, query, response, rag_start_time, rag_end_time): + data = self.query.save_rag_interaction( + user_id="admin", + session_id=session_id, + context=context, + query=query, + response=response, + rag_start_time=rag_start_time, + rag_end_time=rag_end_time + ) + return data + + def get_save_details(self, page: int, page_size: int) -> dict: + data = self.query.get_rag_interactions( + user_id="admin", + page=page, + page_size=page_size + ) + return data + + def log_user_image_upload(self): + """Log an image upload for the current user""" + try: + return self.query.log_image_upload(self.identity) + except Exception as e: + raise ValueError(f"Failed to log image upload: {e}") + + def get_user_daily_uploads(self): + """Get number of images uploaded by current user in the last 24 hours""" + try: + return self.query.get_user_daily_uploads(self.identity) + except Exception as e: + raise ValueError(f"Failed to get user daily uploads: {e}") + + def get_user_last_upload_time(self): + """Get the timestamp of current user's most recent image upload""" + try: + return self.query.get_user_last_upload_time(self.identity) + except Exception as e: + raise ValueError(f"Failed to get user's last upload time: {e}") + + + + +``` + +### app\services\environmental_condition.py + +```python +import requests +from bs4 import BeautifulSoup + + +class EnvironmentalData: + def __init__(self, city): + self.city = city + self.aqi_url = f"https://api.waqi.info/feed/{city}/?token=466cde4d55e7c5d6cc658ad9c391214b593f46b9" + self.uv_url = f"https://www.weatheronline.co.uk/Pakistan/{city}/UVindex.html" + + def fetch_aqi_data(self): + try: + response = requests.get(self.aqi_url) + data = response.json() + + if data["status"] == "ok": + return { + "Temperature": data["data"]["iaqi"].get("t", {}).get("v", "N/A"), + "Humidity": data["data"]["iaqi"].get("h", {}).get("v", "N/A"), + "Wind Speed": data["data"]["iaqi"].get("w", {}).get("v", "N/A"), + "Pressure": data["data"]["iaqi"].get("p", {}).get("v", "N/A"), + "AQI": data["data"].get("aqi", "N/A"), + "Dominant Pollutant": data["data"].get("dominentpol", "N/A"), + } + return self.get_default_aqi_data() + except: + return self.get_default_aqi_data() + + def get_default_aqi_data(self): + return { + "Temperature": "N/A", + "Humidity": "N/A", + "Wind Speed": "N/A", + "Pressure": "N/A", + "AQI": "N/A", + "Dominant Pollutant": "N/A" + } + + def fetch_uv_data(self): + try: + response = requests.get(self.uv_url) + soup = BeautifulSoup(response.text, 'html.parser') + gr1_elements = soup.find_all(class_='gr1') + + if gr1_elements: + tr_elements = gr1_elements[0].find_all('tr') + if len(tr_elements) > 1: + second_tr = tr_elements[1] + td_elements = second_tr.find_all('td') + if len(td_elements) > 1: + return int(td_elements[1].text.strip()) + return "N/A" + except: + return "N/A" + + def get_environmental_data(self): + aqi_data = self.fetch_aqi_data() + uv_index = self.fetch_uv_data() + + environmental_data = { + "Temperature": f"{aqi_data['Temperature']} °C" if aqi_data['Temperature'] != "N/A" else "N/A", + "Humidity": f"{aqi_data['Humidity']} %" if aqi_data['Humidity'] != "N/A" else "N/A", + "Wind Speed": f"{aqi_data['Wind Speed']} m/s" if aqi_data['Wind Speed'] != "N/A" else "N/A", + "Pressure": f"{aqi_data['Pressure']} hPa" if aqi_data['Pressure'] != "N/A" else "N/A", + "Air Quality Index": aqi_data['AQI'], + "Dominant Pollutant": aqi_data["Dominant Pollutant"], + "UV_Index": uv_index + } + + return environmental_data +``` + +### app\services\image_classification_vit.py + +```python +import torch +from PIL import Image +import torch.nn.functional as F +from torchvision import transforms +from transformers import AutoModelForImageClassification, AutoConfig +import requests +from io import BytesIO +import os +from huggingface_hub import hf_hub_download +from dotenv import load_dotenv + + +load_dotenv() + +HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") + + +class SkinDiseaseClassifier: + CLASS_NAMES = [ + "Acne", "Basal Cell Carcinoma", "Benign Keratosis-like Lesions", "Chickenpox", "Eczema", "Healthy Skin", + "Measles", "Melanocytic Nevi", "Melanoma", "Monkeypox", "Psoriasis Lichen Planus and related diseases", + "Seborrheic Keratoses and other Benign Tumors", "Tinea Ringworm Candidiasis and other Fungal Infections", + "Vitiligo", "Warts Molluscum and other Viral Infections" + ] + + def __init__(self, repo_id="muhammadnoman76/skin-disease-classifier"): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.repo_id = repo_id + self.model = self.load_trained_model() + self.transform = self.get_inference_transform() + + def load_trained_model(self): + model_path= hf_hub_download(repo_id=self.repo_id, filename="healthy.pth", token=HUGGINGFACE_TOKEN) + + checkpoint = torch.load(model_path, map_location=self.device, weights_only=True) + classifier_weight = checkpoint['model_state_dict']['classifier.3.weight'] + num_classes = classifier_weight.size(0) + + config = AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=num_classes) + model = AutoModelForImageClassification.from_pretrained( + "google/vit-base-patch16-224-in21k", + config=config, + ignore_mismatched_sizes=True + ) + + in_features = model.classifier.in_features + model.classifier = torch.nn.Sequential( + torch.nn.Linear(in_features, 512), + torch.nn.ReLU(), + torch.nn.Dropout(0.3), + torch.nn.Linear(512, num_classes) + ) + + model.load_state_dict(checkpoint['model_state_dict']) + model = model.to(self.device) + if self.device.type == 'cuda': + model = model.half() + + model.eval() + return model + + def get_inference_transform(self): + return transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + def load_image(self, image_input): + try: + if isinstance(image_input, Image.Image): + image = image_input + elif isinstance(image_input, str): + if image_input.startswith(('http://', 'https://')): + response = requests.get(image_input) + image = Image.open(BytesIO(response.content)) + else: + if not os.path.exists(image_input): + raise FileNotFoundError(f"Image file not found: {image_input}") + image = Image.open(image_input) + elif hasattr(image_input, 'read'): + image = Image.open(image_input) + else: + raise ValueError("Unsupported image input type") + return image.convert('RGB') + except Exception as e: + raise Exception(f"Error loading image: {str(e)}") + + def predict(self, image_input, confidence_threshold=0.3): + try: + image = self.load_image(image_input) + image_tensor = self.transform(image).unsqueeze(0) + if self.device.type == 'cuda': + image_tensor = image_tensor.half() + image_tensor = image_tensor.to(self.device) + with torch.inference_mode(): + outputs = self.model(pixel_values=image_tensor).logits + probabilities = F.softmax(outputs, dim=1) + confidence, predicted = torch.max(probabilities, 1) + + confidence = confidence.item() + predicted_class_idx = predicted.item() + confidence_percentage = round(confidence * 100, 2) + predicted_class_name = self.CLASS_NAMES[predicted_class_idx] + + return predicted_class_name, confidence_percentage + + except Exception as e: + raise Exception(f"Error during prediction: {str(e)}") +``` + +### app\services\image_processor.py + +```python +from datetime import datetime, timezone, timedelta +from typing import Dict, Any +from concurrent.futures import ThreadPoolExecutor +from yake import KeywordExtractor +from app.services.chathistory import ChatSession +from app.services.websearch import WebSearch +from app.services.llm_model import Model +from app.services.environmental_condition import EnvironmentalData +from app.services.prompts import * +from app.services.vector_database_search import VectorDatabaseSearch +from app.services.image_classification_vit import SkinDiseaseClassifier +import io +from PIL import Image +import os +import shutil +from werkzeug.utils import secure_filename + +temp_dir = "temp" +if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + +upload_dir = "uploads" +if not os.path.exists(upload_dir): + os.makedirs(upload_dir) + +class ImageProcessor: + def __init__(self, token: str, session_id: str, num_results: int, num_images: int, image): + self.token = token + self.image = image + self.session_id = session_id + self.num_results = num_results + self.num_images = num_images + self.vectordb = VectorDatabaseSearch() + self.chat_session = ChatSession(token, session_id) + self.user_city = self.chat_session.get_city() + city = self.user_city if self.user_city else '' + self.environment_data = EnvironmentalData(city) + self.web_searcher = WebSearch(num_results=num_results, max_images=num_images) + + def extract_keywords_yake(self, text: str, language: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list: + lang_code = "en" + if language.lower() == "urdu": + lang_code = "ur" + + kw_extractor = KeywordExtractor( + lan=lang_code, + n=max_ngram_size, + top=num_keywords, + features=None + ) + keywords = kw_extractor.extract_keywords(text) + return [kw[0] for kw in keywords] + + def ensure_valid_session(self, title: str = None) -> str: + if not self.session_id or not self.session_id.strip(): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + else: + try: + if not self.chat_session.validate_session(self.session_id, title=title): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + except ValueError: + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + return self.session_id + + def validate_upload(self): + """Validate if user can upload an image based on daily limit and time restriction""" + try: + # Check daily upload limit + daily_uploads = self.chat_session.get_user_daily_uploads() + print(f"Daily uploads: {daily_uploads}") + + if daily_uploads >= 5: + if self.chat_session.get_language().lower() == "urdu": + return False, "آپ کی روزانہ کی حد (5 تصاویر) پوری ہو چکی ہے۔ براہ کرم کل کوشش کریں۔" + else: + return False, "You've reached your daily limit (5 images). Please try again tomorrow." + + # Check time between uploads + last_upload_time = self.chat_session.get_user_last_upload_time() + print(f"Last upload time: {last_upload_time}") + + if last_upload_time: + # Ensure last_upload_time is timezone-aware + if last_upload_time.tzinfo is None: + # If naive, make it timezone-aware by attaching UTC + last_upload_time = last_upload_time.replace(tzinfo=timezone.utc) + + # Now get the current time (which is already timezone-aware) + now = datetime.now(timezone.utc) + + # Now both times are timezone-aware, so the subtraction will work + time_since_last = now - last_upload_time + print(f"Time since last: {time_since_last}") + + if time_since_last < timedelta(minutes=1): + seconds_remaining = 60 - time_since_last.seconds + print(f"Seconds remaining: {seconds_remaining}") + + if self.chat_session.get_language().lower() == "urdu": + return False, f"براہ کرم {seconds_remaining} سیکنڈ انتظار کریں اور دوبارہ کوشش کریں۔" + else: + return False, f"Please wait {seconds_remaining} seconds before uploading another image." + + # Log this upload + result = self.chat_session.log_user_image_upload() + print(f"Logged upload: {result}") + return True, "" + except Exception as e: + print(f"Error in validate_upload: {str(e)}") + # Fail safely - if we can't validate, we should allow the upload + return True, "" + + def process_chat(self, query: str) -> Dict[str, Any]: + try: + is_valid, message = self.validate_upload() + if not is_valid: + return { + "query": query, + "response": message, + "references": "", + "page_no": "", + "keywords": "", + "images": "", + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.session_id or "" + } + + profile = self.chat_session.get_name_and_age() + name = profile['name'] + age = profile['age'] + self.chat_session.load_chat_history() + self.chat_session.update_title(self.session_id, query) + history = self.chat_session.format_history() + language = self.chat_session.get_language().lower() + + filename = secure_filename(self.image.filename) + temp_path = os.path.join(temp_dir, filename) + upload_path = os.path.join(upload_dir, filename) + + content = self.image.file.read() + + with open(temp_path, 'wb') as buffer: + buffer.write(content) + self.image.file.seek(0) + + img_content = io.BytesIO(content) + pil_image = Image.open(img_content) + + self.image.file.seek(0) + + def background_file_ops(src, dst): + shutil.copy2(src, dst) + os.remove(src) + + with ThreadPoolExecutor(max_workers=1) as file_executor: + file_executor.submit(background_file_ops, temp_path, upload_path) + + if language != "urdu": + response1 = "Please provide a clear image of your skin with good lighting and a proper angle, without any filters! we can only analysis the image of skin :)" + response3 = "You have healthy skin, MaShaAllah! I don't notice any issues at the moment. However, based on my current confidence level of {diseases_detection_confidence}, I recommend consulting a doctor for more detailed advice and analysis." + response4 = "I'm sorry, I'm not able to identify your skin condition yet as I'm still learning, but I hope to be able to detect any skin issues in the future. :) Right now, my confidence in identifying your skin is below 50%." + response5 = ADVICE_REPORT_SUGGESTION + else: + response1 = "براہ کرم اپنی جلد کی واضح تصویر اچھی روشنی اور مناسب زاویے سے فراہم کریں، کسی فلٹر کے بغیر! ہم صرف جلد کی تصویر کا تجزیہ کر سکتے ہیں" + response3 = "آپ کی جلد صحت مند ہے، ماشاءاللہ! مجھے اس وقت کوئی مسئلہ نظر نہیں آ رہا۔ تاہم، میری موجودہ اعتماد کی سطح {diseases_detection_confidence} کی بنیاد پر، میں مزید تفصیلی مشورے اور تجزیے کے لیے ڈاکٹر سے رجوع کرنے کی تجویز کرتا ہوں۔" + response4 = "معذرت، میں ابھی آپ کی جلد کی حالت کی شناخت کرنے کے قابل نہیں ہوں کیونکہ میں ابھی سیکھ رہا ہوں، لیکن مجھے امید ہے کہ مستقبل میں جلد کے کسی بھی مسئلے کو پہچان سکوں گا۔ :) اس وقت آپ کی جلد کی شناخت میں میرا اعتماد 50% سے کم ہے۔" + response5 = URDU_ADVICE_REPORT_SUGGESTION + + model = Model() + result = model.llm_image(text=SKIN_NON_SKIN_PROMPT, image=pil_image) + result_lower = result.lower().strip() + is_negative = any(marker in result_lower for marker in ["", "no"]) + + if is_negative: + chat_data = { + "query": query, + "response": response1, + "references": "", + "page_no": filename, + "keywords": "", + "images": "", + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + + return chat_data + + diseases_detector = SkinDiseaseClassifier() + diseases_name, diseases_detection_confidence = diseases_detector.predict(pil_image, 5) + + if diseases_name == "Healthy Skin": + chat_data = { + "query": query, + "response": response3.format(diseases_detection_confidence=diseases_detection_confidence), + "references": "", + "page_no": filename, + "keywords": "", + "images": "", + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + + return chat_data + + elif diseases_detection_confidence < 46: + chat_data = { + "query": query, + "response": response4, + "references": "", + "page_no": filename, + "keywords": "", + "images": "", + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + return chat_data + + + if not result: + chat_data = { + "query": query, + "response": response1, + "references": "", + "page_no": filename, + "keywords": "", + "images": "", + "context": "", + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + + return chat_data + + self.session_id = self.ensure_valid_session(title=query) + permission = self.chat_session.get_user_preferences() + websearch_enabled = permission.get('websearch', False) + env_recommendations = permission.get('environmental_recommendations', False) + personalized_recommendations = permission.get('personalized_recommendations', False) + keywords_permission = permission.get('keywords', False) + reference_permission = permission.get('references', False) + language = self.chat_session.get_language().lower() + language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language) + + if websearch_enabled: + with ThreadPoolExecutor(max_workers=2) as executor: + future_web = executor.submit(self.web_searcher.search, diseases_name) + future_images = executor.submit(self.web_searcher.search_images, diseases_name) + web_results = future_web.result() + image_results = future_images.result() + + context_parts = [] + references = [] + + for idx, result in enumerate(web_results, 1): + if result['text']: + context_parts.append(f"From Source {idx}: {result['text']}\n") + references.append(result['link']) + + context = "\n".join(context_parts) + + if env_recommendations and personalized_recommendations: + prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + user_details=self.chat_session.get_personalized_recommendation(), + environmental_condition=self.environment_data.get_environmental_data(), + previous_history="", + context=context, + current_query=query + ) + elif personalized_recommendations: + prompt = PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + user_details=self.chat_session.get_personalized_recommendation(), + previous_history="", + context=context, + current_query=query + ) + elif env_recommendations: + prompt = ENVIRONMENTAL_PROMPT.format( + user_name=name, + user_age=age, + environmental_condition=self.environment_data.get_environmental_data(), + previous_history="", + context=context, + current_query=query + ) + else: + prompt = DEFAULT_PROMPT.format( + previous_history="", + context=context, + current_query=query + ) + + prompt = prompt + f"\the query is related to {diseases_name}" + language_prompt + + llm_response = Model().llm(prompt, query) + + response = response5.format( + diseases_name=diseases_name, + diseases_detection_confidence=diseases_detection_confidence, + response=llm_response + ) + + keywords = "" + + if keywords_permission: + keywords = self.extract_keywords_yake(response, language=language) + if not reference_permission: + references = "" + + chat_data = { + "query": query, + "response": response, + "references": references, + "page_no": filename, + "keywords": keywords, + "images": image_results, + "context": context, + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + return chat_data + + else: + attach_image = False + + with ThreadPoolExecutor(max_workers=2) as executor: + future_images = executor.submit(self.web_searcher.search_images, diseases_name) + image_results = future_images.result() + + results = self.vectordb.search(diseases_name , top_k= 3) + + context_parts = [] + references = [] + seen_pages = set() + + for result in results: + confidence = result['confidence'] + if confidence > 60: + context_parts.append(f"Content: {result['content']}") + page = result['page'] + if page not in seen_pages: + references.append(f"Source: {result['source']}, Page: {page}") + seen_pages.add(page) + attach_image = True + + context = "\n".join(context_parts) + + if not context or len(context) < 10: + context = "There is no context found unfortunately please do not answer anything and ignore previous information or recommendations that were mentioned earlier in the context." + + if env_recommendations and personalized_recommendations: + prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + user_details=self.chat_session.get_personalized_recommendation(), + environmental_condition=self.environment_data.get_environmental_data(), + previous_history="", + context=context, + current_query=query + ) + elif personalized_recommendations: + prompt = PERSONALIZED_PROMPT.format( + user_name=name, + user_age=age, + user_details=self.chat_session.get_personalized_recommendation(), + previous_history="", + context=context, + current_query=query + ) + elif env_recommendations: + prompt = ENVIRONMENTAL_PROMPT.format( + user_name=name, + user_age=age, + environmental_condition=self.environment_data.get_environmental_data(), + previous_history=history, + context=context, + current_query=query + ) + else: + prompt = DEFAULT_PROMPT.format( + previous_history="", + context=context, + current_query=query + ) + + prompt = prompt + f"\the query is related to {diseases_name}" + language_prompt + + llm_response = Model().llm(prompt, query) + + response = response5.format( + diseases_name=diseases_name, + diseases_detection_confidence=diseases_detection_confidence, + response=llm_response + ) + + keywords = "" + + if keywords_permission: + keywords = self.extract_keywords_yake(response, language=language) + if not reference_permission: + references = "" + if not attach_image: + image_results = "" + keywords = "" + + chat_data = { + "query": query, + "response": response, + "references": references, + "page_no": filename, + "keywords": keywords, + "images": image_results, + "context": context, + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + return chat_data + + except Exception as e: + return { + "error": str(e), + "query": query, + "response": "Sorry, there was an error processing your request.", + "timestamp": datetime.now(timezone.utc).isoformat() + } + + def web_search(self, query: str) -> Dict[str, Any]: + if self.session_id and len(self.session_id) > 5: + return self.process_chat(query=query) + else: + return self.process_chat(query=query) +``` + +### app\services\llm_model.py + +```python +import json +# from google import genai +from dotenv import load_dotenv +import os +# from google import genai +# from google.genai import types +import re +from g4f.client import Client +# from google.genai.types import GenerateContentConfig, HttpOptions + +load_dotenv() + +class Model: + def __init__(self): + self.gemini_api_key = os.getenv("GEMINI_API_KEY") + self.gemini_model = os.getenv("GEMINI_MODEL") + self.client = genai.Client(api_key=self.gemini_api_key) + + def fall_back_llm(self, prompt): + """Fallback method using gpt-4o-mini when Gemini fails""" + try: + response = Client().chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + web_search=False + ) + return response.choices[0].message.content + except Exception as e: + return f"Both primary and fallback models failed. Error: {str(e)}" + + def send_message_openrouter(self, prompt): + try: + response = self.client.models.generate_content( + model=self.gemini_model, + contents=prompt + ) + return response.text + except Exception as e: + print(f"Gemini failed: {str(e)}. Trying fallback model...") + return self.fall_back_llm(prompt) + + def llm(self, prompt, query): + try: + combined_content = f"{prompt}\n\n{query}" + response = self.client.models.generate_content( + model=self.gemini_model, + contents=combined_content, + config=GenerateContentConfig( + system_instruction=[ + "You're a Mr DermaAI a friendly AI based Dermatologist.", + "Your mission is to help people based on user queries.", + ] + ), + ) + return response.text + except Exception as e: + print(f"Gemini failed: {str(e)}. Trying fallback model...") + return self.fall_back_llm(f"{prompt}\n\n{query}") + + def llm_image(self, text, image): + try: + response = self.client.models.generate_content( + model=self.gemini_model, + contents=[image, text], + ) + return response.text + except Exception as e: + print(f"Error in llm_image: {str(e)}") + return f"Error: {str(e)}" + + def clean_json_response(self, response_text): + """Clean the model's response to extract valid JSON.""" + start = response_text.find('[') + end = response_text.rfind(']') + 1 + if start != -1 and end != -1: + json_str = re.sub(r",\s*]", "]", response_text[start:end]) + return json_str + return response_text + + def skinScheduler(self, prompt, max_retries=3): + """Generate a skincare schedule with retries and cleaning.""" + for attempt in range(max_retries): + try: + response = self.client.models.generate_content( + model=self.gemini_model, + contents=prompt + ) + cleaned_response = self.clean_json_response(response.text) + return json.loads(cleaned_response) + except json.JSONDecodeError as je: + if attempt == max_retries - 1: + # If all Gemini retries fail, try fallback model + print(f"Gemini failed to produce valid JSON after {max_retries} retries. Trying fallback model...") + fallback_response = self.fall_back_llm(prompt) + try: + cleaned_fallback = self.clean_json_response(fallback_response) + return json.loads(cleaned_fallback) + except json.JSONDecodeError: + return {"error": f"Both models failed to produce valid JSON"} + except Exception as e: + # For other exceptions, go directly to fallback + print(f"Gemini API Error: {str(e)}. Trying fallback model...") + fallback_response = self.fall_back_llm(prompt) + try: + cleaned_fallback = self.clean_json_response(fallback_response) + return json.loads(cleaned_fallback) + except json.JSONDecodeError: + return {"error": "Both models failed to produce valid JSON"} + return {"error": "Max retries reached"} +``` + +### app\services\MagicConvert.py + +```python +import copy +import html +import mimetypes +import os +import re +import tempfile +import traceback +from typing import Any, Dict, List, Optional, Union +from urllib.parse import quote, unquote, urlparse, urlunparse +from warnings import warn, resetwarnings, catch_warnings +import mammoth +import markdownify +import pandas as pd +import pdfminer +import pdfminer.high_level +import pptx +import puremagic +import requests +from bs4 import BeautifulSoup +from charset_normalizer import from_path +from PIL import Image +import pytesseract +import warnings +warnings.filterwarnings("ignore") + +# Set Tesseract path for Linux (Hugging Face Spaces) +pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" + +class OCRReader: + def __init__(self, tesseract_cmd: Optional[str] = None, config: Optional[Dict] = None): + # Use provided tesseract_cmd or fallback to environment default + if tesseract_cmd: + pytesseract.pytesseract.tesseract_cmd = tesseract_cmd + self.config = config or {} + + def read_text_from_image(self, image: Image.Image) -> str: + try: + text = pytesseract.image_to_string(image, **self.config) + return text.strip() + except Exception as e: + raise Exception(f"Error processing image: {str(e)}") + + +class _CustomMarkdownify(markdownify.MarkdownConverter): + def __init__(self, **options: Any): + options["heading_style"] = options.get("heading_style", markdownify.ATX) + super().__init__(**options) + + def convert_a(self, el: Any, text: str, *args, **kwargs): + prefix, suffix, text = markdownify.chomp(text) + if not text: + return "" + href = el.get("href") + title = el.get("title") + if href: + try: + parsed_url = urlparse(href) # type: ignore + if parsed_url.scheme and parsed_url.scheme.lower() not in ["http", "https", "file"]: # type: ignore + return "%s%s%s" % (prefix, text, suffix) + href = urlunparse(parsed_url._replace(path=quote(unquote(parsed_url.path)))) # type: ignore + except ValueError: + return "%s%s%s" % (prefix, text, suffix) + if ( + self.options["autolinks"] + and text.replace(r"\_", "_") == href + and not title + and not self.options["default_title"] + ): + return "<%s>" % href + if self.options["default_title"] and not title: + title = href + title_part = ' "%s"' % title.replace('"', r"\"") if title else "" + return ( + "%s[%s](%s%s)%s" % (prefix, text, href, title_part, suffix) + if href + else text + ) + + def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str: + if not convert_as_inline: + if not re.search(r"^\n", text): + return "\n" + super().convert_hn(n, el, text, convert_as_inline) # type: ignore + + return super().convert_hn(n, el, text, convert_as_inline) # type: ignore + + def convert_img(self, el: Any, text: str, *args, **kwargs) -> str: + # Handle both old and new calling patterns + convert_as_inline = kwargs.get('convert_as_inline', False) + if len(args) > 0: + convert_as_inline = args[0] + + alt = el.attrs.get("alt", None) or "" + src = el.attrs.get("src", None) or "" + title = el.attrs.get("title", None) or "" + title_part = ' "%s"' % title.replace('"', r"\"") if title else "" + if ( + convert_as_inline + and el.parent.name not in self.options["keep_inline_images_in"] + ): + return alt + if src.startswith("data:"): + src = src.split(",")[0] + "..." + + return "![%s](%s%s)" % (alt, src, title_part) + + def convert_soup(self, soup: Any) -> str: + return super().convert_soup(soup) + + +class DocumentConverterResult: + def __init__(self, title: Union[str, None] = None, text_content: str = ""): + self.title: Union[str, None] = title + self.text_content: str = text_content + + +class DocumentConverter: + def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + raise NotImplementedError() + + def supports_extension(self, ext: str) -> bool: + """Return True if this converter supports the given extension.""" + raise NotImplementedError() + + +class PlainTextConverter(DocumentConverter): + def convert( + self, local_path: str, **kwargs: Any + ) -> Union[None, DocumentConverterResult]: + content_type, _ = mimetypes.guess_type( + "__placeholder" + kwargs.get("file_extension", "") + ) + if content_type is None: + return None + elif "text/" not in content_type.lower(): + return None + + text_content = str(from_path(local_path).best()) + return DocumentConverterResult( + title=None, + text_content=text_content, + ) + + +class HtmlConverter(DocumentConverter): + def convert( + self, local_path: str, **kwargs: Any + ) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "") + if extension.lower() not in [".html", ".htm"]: + return None + + result = None + with open(local_path, "rt", encoding="utf-8") as fh: + result = self._convert(fh.read()) + + return result + + def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]: + soup = BeautifulSoup(html_content, "html.parser") + for script in soup(["script", "style"]): + script.extract() + body_elm = soup.find("body") + webpage_text = "" + if body_elm: + webpage_text = _CustomMarkdownify().convert_soup(body_elm) + else: + webpage_text = _CustomMarkdownify().convert_soup(soup) + + assert isinstance(webpage_text, str) + + return DocumentConverterResult( + title=None if soup.title is None else soup.title.string, + text_content=webpage_text, + ) + + +class PdfConverter(DocumentConverter): + def supports_extension(self, ext: str) -> bool: + return ext.lower() == '.pdf' + + def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "") + if extension.lower() != ".pdf": + return None + return DocumentConverterResult( + title=None, + text_content=pdfminer.high_level.extract_text(local_path), + ) + + +class DocxConverter(HtmlConverter): + def supports_extension(self, ext: str) -> bool: + return ext.lower() == '.docx' + + def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "") + if extension.lower() != ".docx": + return None + result = None + with open(local_path, "rb") as docx_file: + style_map = kwargs.get("style_map", None) + result = mammoth.convert_to_html(docx_file, style_map=style_map) + html_content = result.value + result = self._convert(html_content) + return result + + +class XlsxConverter(HtmlConverter): + def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "") + if extension.lower() != ".xlsx": + return None + + sheets = pd.read_excel(local_path, sheet_name=None) + md_content = "" + for s in sheets: + md_content += f"## {s}\n" + html_content = sheets[s].to_html(index=False) + md_content += self._convert(html_content).text_content.strip() + "\n\n" + + return DocumentConverterResult( + title=None, + text_content=md_content.strip(), + ) + + +class PptxConverter(HtmlConverter): + def supports_extension(self, ext: str) -> bool: + return ext.lower() == '.pptx' + + def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "") + if extension.lower() != ".pptx": + return None + md_content = "" + presentation = pptx.Presentation(local_path) + slide_num = 0 + for slide in presentation.slides: + slide_num += 1 + + md_content += f"\n\n\n" + + title = slide.shapes.title + for shape in slide.shapes: + if self._is_picture(shape): + alt_text = "" + try: + alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "") + except Exception: + pass + filename = re.sub(r"\W", "", shape.name) + ".jpg" + md_content += ( + "\n![" + + (alt_text if alt_text else shape.name) + + "](" + + filename + + ")\n" + ) + + # Tables + if self._is_table(shape): + html_table = "" + first_row = True + for row in shape.table.rows: + html_table += "" + for cell in row.cells: + if first_row: + html_table += "" + else: + html_table += "" + html_table += "" + first_row = False + html_table += "
" + html.escape(cell.text) + "" + html.escape(cell.text) + "
" + md_content += ( + "\n" + self._convert(html_table).text_content.strip() + "\n" + ) + if shape.has_chart: + md_content += self._convert_chart_to_markdown(shape.chart) + elif shape.has_text_frame: + if shape == title: + md_content += "# " + shape.text.lstrip() + "\n" + else: + md_content += shape.text + "\n" + + md_content = md_content.strip() + + if slide.has_notes_slide: + md_content += "\n\n### Notes:\n" + notes_frame = slide.notes_slide.notes_text_frame + if notes_frame is not None: + md_content += notes_frame.text + md_content = md_content.strip() + + return DocumentConverterResult( + title=None, + text_content=md_content.strip(), + ) + + def _is_picture(self, shape): + if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE: + return True + if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PLACEHOLDER: + if hasattr(shape, "image"): + return True + return False + + def _is_table(self, shape): + if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.TABLE: + return True + return False + + def _convert_chart_to_markdown(self, chart): + md = "\n\n### Chart" + if chart.has_title: + md += f": {chart.chart_title.text_frame.text}" + md += "\n\n" + data = [] + category_names = [c.label for c in chart.plots[0].categories] + series_names = [s.name for s in chart.series] + data.append(["Category"] + series_names) + + for idx, category in enumerate(category_names): + row = [category] + for series in chart.series: + row.append(series.values[idx]) + data.append(row) + + markdown_table = [] + for row in data: + markdown_table.append("| " + " | ".join(map(str, row)) + " |") + header = markdown_table[0] + separator = "|" + "|".join(["---"] * len(data[0])) + "|" + return md + "\n".join([header, separator] + markdown_table[1:]) + + +class FileConversionException(BaseException): + pass + + +class UnsupportedFormatException(BaseException): + pass + + +class ImageConverter(DocumentConverter): + def __init__(self, ocr_reader: Optional[OCRReader] = None): + self.ocr_reader = ocr_reader or OCRReader() + + def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + extension = kwargs.get("file_extension", "").lower() + if extension not in ['.png', '.jpg', '.jpeg', '.tiff', '.bmp']: + return None + + try: + image = Image.open(local_path) + text_content = self.ocr_reader.read_text_from_image(image) + markdown_content = self._convert_to_markdown_structure(text_content) + return DocumentConverterResult( + title=None, + text_content=markdown_content + ) + except Exception as e: + raise FileConversionException(f"Failed to process image: {str(e)}") + + def _convert_to_markdown_structure(self, text_content: str) -> str: + lines = text_content.split('\n') + markdown = [] + current_table = [] + in_table = False + + i = 0 + while i < len(lines): + line = lines[i].strip() + next_line = lines[i + 1].strip() if i + 1 < len(lines) else "" + + if not line: + if in_table: + markdown.extend(self._format_table(current_table)) + current_table = [] + in_table = False + markdown.append("") + i += 1 + continue + + header_level = self._detect_header_level(line, next_line) + if header_level: + if in_table: + markdown.extend(self._format_table(current_table)) + current_table = [] + in_table = False + markdown.append(f"{'#' * header_level} {line}") + i += 2 if header_level > 0 and next_line and set(next_line) in [set('='), set('-')] else 1 + continue + + list_format = self._detect_list_format(line) + if list_format: + if in_table: + markdown.extend(self._format_table(current_table)) + current_table = [] + in_table = False + markdown.append(list_format) + i += 1 + continue + if self._is_likely_table_row(line): + in_table = True + current_table.append(line) + i += 1 + continue + if in_table: + markdown.extend(self._format_table(current_table)) + current_table = [] + in_table = False + line = self._format_emphasis(line) + + markdown.append(line) + i += 1 + if current_table: + markdown.extend(self._format_table(current_table)) + + return "\n\n".join([l for l in markdown if l]) + + def _detect_header_level(self, line: str, next_line: str) -> int: + if line.startswith('#'): + return len(re.match(r'^#+', line).group()) + if next_line: + if set(next_line) == set('='): + return 1 + if set(next_line) == set('-'): + return 2 + if len(line) <= 100 and line.strip(): + words = line.split() + if all(word[0].isupper() for word in words if word): + return 1 + if line[0].isupper() and len(words) <= 10: + return 2 + + return 0 + + def _detect_list_format(self, line: str) -> Optional[str]: + bullet_points = ['-', '•', '*', '○', '►', '·'] + for bullet in bullet_points: + if line.lstrip().startswith(bullet): + content = line.lstrip()[1:].strip() + return f"- {content}" + + if re.match(r'^\d+[\.\)]', line): + content = re.sub(r'^\d+[\.\)]', '', line).strip() + return f"1. {content}" + + return None + + def _is_likely_table_row(self, line: str) -> bool: + parts = [p for p in re.split(r'\s{2,}', line) if p.strip()] + if len(parts) >= 2: + lengths = [len(p) for p in parts] + avg_length = sum(lengths) / len(lengths) + if all(abs(l - avg_length) <= 5 for l in lengths): + return True + return False + + def _format_table(self, table_rows: List[str]) -> List[str]: + if not table_rows: + return [] + split_rows = [re.split(r'\s{2,}', row.strip()) for row in table_rows] + max_cols = max(len(row) for row in split_rows) + normalized_rows = [] + for row in split_rows: + while len(row) < max_cols: + row.append('') + normalized_rows.append(row) + col_widths = [] + for col in range(max_cols): + width = max(len(row[col]) for row in normalized_rows) + col_widths.append(width) + markdown_table = [] + + header = "| " + " | ".join(cell.ljust(width) for cell, width in zip(normalized_rows[0], col_widths)) + " |" + markdown_table.append(header) + + separator = "|" + "|".join("-" * (width + 2) for width in col_widths) + "|" + markdown_table.append(separator) + + for row in normalized_rows[1:]: + formatted_row = "| " + " | ".join(cell.ljust(width) for cell, width in zip(row, col_widths)) + " |" + markdown_table.append(formatted_row) + + return markdown_table + + def _format_emphasis(self, text: str) -> str: + text = re.sub(r'\b([A-Z]{2,})\b', r'**\1**', text) + text = re.sub(r'[_/](.*?)[_/]', r'*\1*', text) + return text + +class MagicConvert: + def __init__( + self, + requests_session: Optional[requests.Session] = None, + style_map: Optional[str] = None, + ): + if requests_session is None: + self._requests_session = requests.Session() + else: + self._requests_session = requests_session + + self._style_map = style_map + self._page_converters: List[DocumentConverter] = [] + + ocr_reader = OCRReader() + + self.register_page_converter(ImageConverter(ocr_reader)) + self.register_page_converter(PlainTextConverter()) + self.register_page_converter(HtmlConverter()) + self.register_page_converter(DocxConverter()) + self.register_page_converter(XlsxConverter()) + self.register_page_converter(PptxConverter()) + self.register_page_converter(PdfConverter()) + + def magic( + self, source: Union[str, requests.Response], **kwargs: Any + ) -> DocumentConverterResult: + if isinstance(source, str): + if ( + source.startswith("http://") + or source.startswith("https://") + or source.startswith("file://") + ): + return self.convert_url(source, **kwargs) + else: + return self.convert_local(source, **kwargs) + elif isinstance(source, requests.Response): + return self.convert_response(source, **kwargs) + + def convert_local( + self, path: str, **kwargs: Any + ) -> DocumentConverterResult: + ext = kwargs.get("file_extension") + extensions = [ext] if ext is not None else [] + base, ext = os.path.splitext(path) + self._append_ext(extensions, ext) + + for g in self._guess_ext_magic(path): + self._append_ext(extensions, g) + return self._convert(path, extensions, **kwargs) + + def convert_stream( + self, stream: Any, **kwargs: Any + ) -> DocumentConverterResult: + ext = kwargs.get("file_extension") + extensions = [ext] if ext is not None else [] + handle, temp_path = tempfile.mkstemp() + fh = os.fdopen(handle, "wb") + result = None + try: + content = stream.read() + if isinstance(content, str): + fh.write(content.encode("utf-8")) + else: + fh.write(content) + fh.close() + for g in self._guess_ext_magic(temp_path): + self._append_ext(extensions, g) + result = self._convert(temp_path, extensions, **kwargs) + finally: + try: + fh.close() + except Exception: + pass + os.unlink(temp_path) + + return result + + def convert_url( + self, url: str, **kwargs: Any + ) -> DocumentConverterResult: + response = self._requests_session.get(url, stream=True) + response.raise_for_status() + return self.convert_response(response, **kwargs) + + def convert_response( + self, response: requests.Response, **kwargs: Any + ) -> DocumentConverterResult: + ext = kwargs.get("file_extension") + extensions = [ext] if ext is not None else [] + content_type = response.headers.get("content-type", "").split(";")[0] + self._append_ext(extensions, mimetypes.guess_extension(content_type)) + content_disposition = response.headers.get("content-disposition", "") + m = re.search(r"filename=([^;]+)", content_disposition) + if m: + base, ext = os.path.splitext(m.group(1).strip("\"'")) + self._append_ext(extensions, ext) + base, ext = os.path.splitext(urlparse(response.url).path) + self._append_ext(extensions, ext) + handle, temp_path = tempfile.mkstemp() + fh = os.fdopen(handle, "wb") + result = None + try: + for chunk in response.iter_content(chunk_size=512): + fh.write(chunk) + fh.close() + for g in self._guess_ext_magic(temp_path): + self._append_ext(extensions, g) + + result = self._convert(temp_path, extensions, url=response.url, **kwargs) + finally: + try: + fh.close() + except Exception: + pass + os.unlink(temp_path) + + return result + + def _convert( + self, local_path: str, extensions: List[Union[str, None]], **kwargs + ) -> DocumentConverterResult: + error_trace = "" + for ext in extensions + [None]: + for converter in self._page_converters: + _kwargs = copy.deepcopy(kwargs) + if ext is None: + if "file_extension" in _kwargs: + del _kwargs["file_extension"] + else: + _kwargs.update({"file_extension": ext}) + + _kwargs["_parent_converters"] = self._page_converters + if "style_map" not in _kwargs and self._style_map is not None: + _kwargs["style_map"] = self._style_map + + try: + res = converter.convert(local_path, **_kwargs) + if res is not None: + res.text_content = "\n".join( + [line.rstrip() for line in re.split(r"\r?\n", res.text_content)] + ) + res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content) + return res + except Exception as e: + # If this converter supports the extension and fails, raise the exception + if ext is not None and converter.supports_extension(ext): + raise FileConversionException( + f"Could not convert '{local_path}' to Markdown with {converter.__class__.__name__} " + f"for extension '{ext}'. The following error occurred:\n\n{traceback.format_exc()}" + ) + # Otherwise, store the error and continue + error_trace = ("\n\n" + traceback.format_exc()).strip() + + if len(error_trace) > 0: + raise FileConversionException( + f"Could not convert '{local_path}' to Markdown. File type was recognized as {extensions}. " + f"While converting the file, the following error was encountered:\n\n{error_trace}" + ) + raise UnsupportedFormatException( + f"Could not convert '{local_path}' to Markdown. The formats {extensions} are not supported." + ) + + def _append_ext(self, extensions, ext): + if ext is None: + return + ext = ext.strip() + if ext == "": + return + extensions.append(ext) + + def _guess_ext_magic(self, path): + try: + guesses = puremagic.magic_file(path) + extensions = list() + for g in guesses: + ext = g.extension.strip() + if len(ext) > 0: + if not ext.startswith("."): + ext = "." + ext + if ext not in extensions: + extensions.append(ext) + return extensions + except FileNotFoundError: + pass + except IsADirectoryError: + pass + except PermissionError: + pass + return [] + + def register_page_converter(self, converter: DocumentConverter) -> None: + self._page_converters.insert(0, converter) +``` + +### app\services\prompts.py + +```python +HISTORY_BASED_PROMPT = """ +You are an expert in formulating queries based on history. You have no concern with the answer to the user's question; your focus is solely on the question asked by the user. + +History: {history} + +new_question_by_human: {query} + +Given the conversation history and the new question, return only the concise question that should be understood by a web search engine. Ensure that the final question correctly reflects the context of the conversation, especially when a specific term (like things "disease", "person", "anything") is implied. Do not include any explanation or additional text, just return the final question and remember if the question is not related to history then just write the same question without changing anything even a single word. + +Weather what language give you, you should strictly just response english +""" + +DEFAULT_PROMPT = """ +Previous Conversation Context: +{previous_history} + +You are a professional dermatologist with years of clinical experience. Your name is Dr. Derma, and you're here to provide medically accurate skin care advice. + +Respond to the user's question based STRICTLY on the following context information. If the context doesn't contain sufficient information, clearly state: "As a dermatologist, I don't have enough specific information about this in my reference materials." + +Context: +{context} +User Question: {current_query} + +Important guidelines: +1. Only answer medical/dermatological questions or engage in casual conversation. For any other topics, politely redirect the conversation to skin health. +2. Always maintain a professional, caring tone typical of an experienced dermatologist. +3. Never mention that you're using "context" or following instructions - speak naturally as a real doctor would. +4. For non-medical casual questions, provide brief, friendly responses while subtly steering back to dermatology topics. +5. Never make up medical information - if the context doesn't provide sufficient information, acknowledge the limitations. + +Response format: +- For dermatological questions: Provide clear, concise medical information using your expertise. +- For greetings/casual conversation: Respond warmly but concisely as a friendly dermatologist would. +- For non-dermatological questions: Politely note that as a dermatologist, you focus on skin health topics, and offer to help with any skin-related concerns. + +End your response with: "*For proper medical advice, an in-person consultation is always recommended.*" +""" + +PERSONALIZED_PROMPT = """ +I am speaking with {user_name}, age {user_age}. + +Patient Information: + +{user_details} + + +Previous Conversation: +{previous_history} + +I am Dr. Derma, a board-certified dermatologist with 15+ years of experience. I will provide personalized dermatological advice based exclusively on the following clinical reference materials: + +Reference Materials: +{context} + +Patient Question: {current_query} + +As a dermatologist, I will: +1. Answer ONLY with information supported by my reference materials +2. Speak naturally as I would in my clinic, without referencing "context" or "instructions" +3. Maintain a professional yet warm tone appropriate for a doctor-patient relationship +4. Only discuss dermatological topics and casual conversation - for unrelated topics, I'll gently redirect to skin health +5. ALWAYS include a "Personal Recommendations" section when answering medical questions + +For dermatological questions: +## [Relevant Medical Topic] +[Main clinical answer based strictly on reference materials] + +## Personal Recommendations +[Specific recommendations considering the patient's age, skin type, medical history, and other relevant factors from their profile] + +*These recommendations are based on limited information. For proper diagnosis and treatment, an in-person consultation is always recommended.* +""" + +ENVIRONMENTAL_PROMPT = """ +I am speaking with {user_name}, age {user_age}. + +Environmental Factors: + +{environmental_condition} + + +Previous Conversation: +{previous_history} + +I am Dr. Derma, a board-certified dermatologist with 15+ years of experience. I will provide environmentally-conscious dermatological advice based exclusively on the following clinical reference materials: + +Reference Materials: +{context} + +Patient Question: {current_query} + +As a dermatologist, I will: +1. Answer ONLY with information supported by my reference materials +2. Speak naturally as I would in my clinic, without referencing "context" or "instructions" +3. Maintain a professional yet warm tone appropriate for a doctor-patient relationship +4. Only discuss dermatological topics and casual conversation - for unrelated topics, I'll gently redirect to skin health +5. ALWAYS include an "Environmental Considerations" section when answering medical questions + +For dermatological questions: +## [Relevant Medical Topic] +[Main clinical answer based strictly on reference materials] + +## Environmental Considerations +[Specific recommendations considering local climate, pollution levels, UV index, and other environmental factors] + +*These recommendations are based on limited information. For proper diagnosis and treatment, an in-person consultation is always recommended.* +""" + + +ENVIRONMENTAL_PERSONALIZED_PROMPT = """ +I am speaking with {user_name}, age {user_age}. + +Patient Information: + +{user_details} + + +Environmental Factors: + +{environmental_condition} + + +Previous Conversation: +{previous_history} + +I am Dr. Derma, a board-certified dermatologist with 15+ years of experience. I will provide comprehensive dermatological advice based exclusively on the following clinical reference materials: + +Reference Materials: +{context} + +Patient Question: {current_query} + +As a dermatologist, I will: +1. Answer ONLY with information supported by my reference materials +2. Speak naturally as I would in my clinic, without referencing "context" or "instructions" +3. Maintain a professional yet warm tone appropriate for a doctor-patient relationship +4. Only discuss dermatological topics and casual conversation - for unrelated topics, I'll gently redirect to skin health +5. ALWAYS include BOTH "Personal Recommendations" AND "Environmental Considerations" sections when answering medical questions + +For dermatological questions: +## [Relevant Medical Topic] +[Main clinical answer based strictly on reference materials] + +## Personal Recommendations +[Specific recommendations considering the patient's age, skin type, medical history, and other relevant factors from their profile] + +## Environmental Considerations +[Specific recommendations considering local climate, pollution levels, UV index, and other environmental factors] + +*These recommendations are based on limited information. For proper diagnosis and treatment, an in-person consultation is always recommended.* +""" + +MEDICAL_REPORT_ANALYSIS_PROMPT = """ +You are an advanced medical report analysis system specializing in dermatology. Your purpose is to analyze and interpret the medical report for patient with the highest level of accuracy and clinical relevance. + +CONTEXT AND CONSTRAINTS: +- Base your analysis EXCLUSIVELY on the provided medical report content +- Do not make assumptions or introduce external medical knowledge +- Maintain strict medical privacy and confidentiality standards + +MEDICAL REPORT: +{report} + +CURRENT QUERY: +{current_query} + +ANALYSIS GUIDELINES: +1. Primary Findings + - Identify and explain key clinical observations + - Highlight any critical diagnostic information + - Note any abnormal results or concerning findings + +2. Clinical Interpretation + - Analyze the findings in their clinical context + - Connect related symptoms and observations + - Identify any patterns or correlations in the data + +3. Response Format: + - Start with a clear, direct answer to the query + - Support your response with specific evidence from the report + - Use medical terminology appropriately with plain language explanations + - Clearly separate facts from interpretations + - Structure information in a logical, easy-to-follow manner + +4. Information Gaps: + - If any critical information is missing, clearly state: "The report does not contain sufficient information regarding [specific aspect]" + - Specify what additional information would be needed for a complete assessment + +IMPORTANT NOTES: +- If the report contains laboratory values or measurements, include the relevant numbers and reference ranges +- For any medical terms used, provide brief explanations in parentheses +- If multiple interpretations are possible, list them in order of likelihood based on the report data +- Flag any urgent or critical findings that may require immediate attention + +Please analyze the provided report and respond to the query while adhering to these guidelines. Maintain professional medical communication standards while ensuring clarity for the reader. + + [At Last add this disclaimer] + *We acknowledge the possibility of errors, so it is always recommended to consult with a doctor for a thorough check-up.* + + And If the report is not Related to Medical the just write + `Sorry Please upload Medical related Report` +""" + + + +LANGUAGE_RESPONSE_PROMPT = """ +STRICT LANGUAGE REQUIREMENTS: +1. Response must be written EXCLUSIVELY in {language} using its official script/orthography +2. English terms ONLY permitted when: + - There's no direct translation (technical terms/proper nouns) + - Retention is crucial for meaning preservation +3. STRICTLY PROHIBITED: + - Code-switching/mixing languages + - Transliterations of {language} words using Latin script + - Non-native punctuation/formatting +4. Ensure: + - Correct grammatical structure for {language} + - Proper script-specific punctuation + - Native character set compliance +5. Formatting must follow {language}'s typographical conventions +6. If unsure about translations: Use native {language} equivalents first + +Respond ONLY in {language} script. Never include translations/explanations. +""" + + +SKIN_CARE_SCHEDULER = """As a skincare expert, generate a daily schedule based on: +- User's skin profile: {personalized_condition} +- Current environmental conditions: {environmental_values} +- Historical routines: {historical_data} + +Create EXACTLY 5 entries in this JSON format: +[ + {{ + "time": "6:00 AM - 8:00 AM", + "recommendation": "Cleanse with [Product Name]", + "icon": "💧", + "category": "morning" + }}, + {{ + "time": "8:00 AM - 10:00 AM", + "recommendation": "Apply [Sunscreen Name] SPF 50", + "icon": "☀️", + "category": "morning" + }}, + {{ + "time": "12:00 PM - 2:00 PM", + "recommendation": "Reapply sunscreen", + "icon": "🌤️", + "category": "afternoon" + }}, + {{ + "time": "6:00 PM - 8:00 PM", + "recommendation": "Evening cleansing routine", + "icon": "🌙", + "category": "evening" + }}, + {{ + "time": "9:00 PM - 11:00 PM", + "recommendation": "Night serum application", + "icon": "✨", + "category": "night" + }} +] + +Important rules: +1. Use only double quotes +2. Maintain category order: morning, morning, afternoon, evening, night +3. Include specific product names from historical data when available +4. Never add comments or text outside the JSON array +5. Time ranges must follow "HH:MM AM/PM - HH:MM AM/PM" format +6. Use appropriate emojis for each activity +""" + + +DEFAULT_SCHEDULE = [ + { + "time": "6:00 AM - 8:00 AM", + "recommendation": "Cleanse with a gentle cleanser", + "icon": "💧", + "category": "Dummy" + }, + { + "time": "8:00 AM - 10:00 AM", + "recommendation": "Apply sunscreen SPF 30+", + "icon": "☀️", + "category": "morning" + }, + { + "time": "12:00 PM - 2:00 PM", + "recommendation": "Reapply sunscreen if needed", + "icon": "🌤️", + "category": "afternoon" + }, + { + "time": "6:00 PM - 8:00 PM", + "recommendation": "Evening cleansing routine", + "icon": "🌙", + "category": "evening" + }, + { + "time": "9:00 PM - 11:00 PM", + "recommendation": "Apply night cream or serum", + "icon": "✨", + "category": "night" + } +] + + +ADVICE_REPORT_SUGGESTION = """ +## Based on your Image Analysis: + + +We have identified the presence of {diseases_name} with a confidence level of {diseases_detection_confidence}. + + +{response} +""" + +URDU_ADVICE_REPORT_SUGGESTION = """ +## آپ کی تصویر کے تجزیے کی بنیاد پر: + + +ہم نے {diseases_detection_confidence} کی اعتماد کی سطح کے ساتھ {diseases_name} کی موجودگی کی شناخت کی ہے۔ + + +{response} +""" + +SKIN_NON_SKIN_PROMPT = """ + You are an expert at analyzing whether an image shows human skin or not. + Your task is to determine if the given image should be processed by a skin disease model. + Examine the image carefully and provide a clear two-word response: + answer if the image shows human skin, otherwise answer . +""" + + + + +``` + +### app\services\RAG_evaluation.py + +```python +from typing import Dict, Any +import re +from datetime import datetime +import nltk +from nltk.corpus import stopwords +from nltk.stem import WordNetLemmatizer +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity +from app.services.chathistory import ChatSession +import os + +# # Set NLTK data path to a writable location +# nltk_data_dir = os.path.join(os.getcwd(), "nltk_data") +# os.makedirs(nltk_data_dir, exist_ok=True) +# nltk.data.path.append(nltk_data_dir) + +# # Download NLTK resources to the specified directory +# nltk.download('stopwords', download_dir=nltk_data_dir) +# nltk.download('wordnet', download_dir=nltk_data_dir) + + +class RAGEvaluation: + def __init__(self, token: str, page: int = 1, page_size: int = 5): + self.chat_session = ChatSession(token, "session_id") + self.page = page + self.page_size = page_size + self.lemmatizer = WordNetLemmatizer() + self.stop_words = set(stopwords.words('english')) + + def _preprocess_text(self, text: str) -> str: + text = re.sub(r'[^a-zA-Z0-9\s]', '', text.lower()) + words = text.split() + lemmatized_words = [self.lemmatizer.lemmatize(word) for word in words] + filtered_words = [word for word in lemmatized_words if word not in self.stop_words] + seen = set() + cleaned_words = [] + for word in filtered_words: + if word not in seen: + seen.add(word) + cleaned_words.append(word) + + return ' '.join(cleaned_words) + + def _calculate_cosine_similarity(self, context: str, response: str) -> float: + clean_context = self._preprocess_text(context) + clean_response = self._preprocess_text(response) + vectorizer = TfidfVectorizer(vocabulary=clean_context.split()) + + try: + context_vector = vectorizer.fit_transform([clean_context]) + response_vector = vectorizer.transform([clean_response]) + return cosine_similarity(context_vector, response_vector)[0][0] + except ValueError: + return 0.0 + + def _calculate_time_difference(self, start_time: str, end_time: str) -> float: + start = datetime.fromisoformat(start_time) + end = datetime.fromisoformat(end_time) + return (end - start).total_seconds() + + def _process_interaction(self, interaction: Dict[str, Any]) -> Dict[str, Any]: + processed = interaction.copy() + processed['accuracy'] = self._calculate_cosine_similarity( + interaction['context'], + interaction['response'] + ) + processed['overall_time'] = self._calculate_time_difference( + interaction['rag_start_time'], + interaction['rag_end_time'] + ) + return processed + + def generate_evaluation_report(self) -> Dict[str, Any]: + raw_data = self.chat_session.get_save_details( + page=self.page, + page_size=self.page_size + ) + + return { + 'total_interactions': raw_data['total_interactions'], + 'page': raw_data['page'], + 'page_size': raw_data['page_size'], + 'total_pages': raw_data['total_pages'], + 'results': [self._process_interaction(i) for i in raw_data['results']] + } +``` + +### app\services\report_process.py + +```python +from datetime import datetime, timezone +from typing import Optional, Dict, Any +from yake import KeywordExtractor +from app.services.chathistory import ChatSession +from app.services.llm_model import Model +from app.services.environmental_condition import EnvironmentalData +from app.services.prompts import * +from app.services.MagicConvert import MagicConvert + +class Report: + def __init__(self, token: str, session_id: Optional[str] = None): + self.token = token + self.session_id = session_id + self.chat_session = ChatSession(token, session_id) + self.user_city = self.chat_session.get_city() + city = self.user_city if self.user_city else '' + self.environment_data = EnvironmentalData(city) + self.markitdown = MagicConvert() + + def extract_keywords_yake(self, text: str, max_ngram_size: int = 2, num_keywords: int = 4) -> list: + kw_extractor = KeywordExtractor( + lan="en", + n=max_ngram_size, + top=num_keywords, + features=None + ) + keywords = kw_extractor.extract_keywords(text) + return [kw[0] for kw in keywords] + + def ensure_valid_session(self, title: str = None) -> str: + if not self.session_id or not self.session_id.strip(): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + else: + try: + if not self.chat_session.validate_session(self.session_id, title=title): + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + except ValueError: + self.chat_session.create_new_session(title=title) + self.session_id = self.chat_session.session_id + return self.session_id + + def process_chat(self, query: str, report_file: str, file_type: Optional[str] = None) -> Dict[str, Any]: + try: + profile = self.chat_session.get_name_and_age() + self.chat_session.update_title(self.session_id, query) + self.session_id = self.ensure_valid_session(title=query) + language = self.chat_session.get_language().lower() + language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language) + if not report_file or not file_type: + return { + "error": "Report file or file type missing", + "query": query, + "response": "Sorry, report file or file type is missing.", + "timestamp": datetime.now(timezone.utc).isoformat() + } + report_file_name = report_file + " (File Uploaded)" + conversion_result = self.markitdown.magic(report_file) + report_text = conversion_result.text_content + + prompt = MEDICAL_REPORT_ANALYSIS_PROMPT.format( + report=report_text, + current_query=query + ) + + response = Model().response = Model().llm(prompt + "\n" + language_prompt , query) + keywords = self.extract_keywords_yake(response) + + chat_data = { + "query": report_file_name + "\n" +query, + "response": response, + "references": "", + "page_no": "", + "keywords": keywords, + "images": "", + "context": report_text, + "timestamp": datetime.now(timezone.utc).isoformat(), + "session_id": self.chat_session.session_id + } + + if not self.chat_session.save_chat(chat_data): + raise ValueError("Failed to save chat message") + + return chat_data + + except Exception as e: + return { + "error": str(e), + "query": query, + "response": "Sorry, there was an error processing your request.", + "timestamp": datetime.now(timezone.utc).isoformat() + } +``` + +### app\services\skincare_scheduler.py + +```python +import json +import logging + +from app.services.chathistory import ChatSession +from app.services.llm_model import Model +from app.services.environmental_condition import EnvironmentalData +from app.services.prompts import SKIN_CARE_SCHEDULER, DEFAULT_SCHEDULE + +class SkinCareScheduler: + def __init__(self, token, session_id): + self.token = token + self.session_id = session_id + self.chat_session = ChatSession(token, session_id) + self.user_city = self.chat_session.get_city() or '' + self.environment_data = EnvironmentalData(self.user_city) + + def get_historical_data(self): + """Retrieve the last 7 days of schedules.""" + schedules = self.chat_session.get_last_seven_days_schedules() + return [schedule["schedule_data"] for schedule in schedules] + + def createTable(self): + """Generate and return a daily skincare schedule.""" + try: + # Check for an existing valid schedule + existing_schedule = self.chat_session.get_today_schedule() + if existing_schedule and isinstance(existing_schedule.get("schedule_data"), list): + return json.dumps(existing_schedule["schedule_data"], indent=2) + + # Gather input data + historical_data = self.get_historical_data() + personalized_condition = self.chat_session.get_personalized_recommendation() or "No specific skin conditions provided" + environmental_data = self.environment_data.get_environmental_data() + + # Format the prompt + formatted_prompt = SKIN_CARE_SCHEDULER.format( + personalized_condition=personalized_condition, + environmental_values=json.dumps(environmental_data, indent=2), + historical_data=json.dumps(historical_data, indent=2) + ) + + # Generate schedule with the model + model = Model() + result = model.skinScheduler(formatted_prompt) + + # Handle errors by falling back to default schedule + if isinstance(result, dict) and "error" in result: + logging.error(f"Model error: {result['error']}") + result = DEFAULT_SCHEDULE + + # Validate basic structure (optional, but ensures 5 entries) + if not isinstance(result, list) or len(result) != 5: + logging.warning("Generated schedule invalid; using default.") + result = DEFAULT_SCHEDULE + + # Save and return the schedule + self.chat_session.save_schedule(result) + return json.dumps(result, indent=2) + + except Exception as e: + logging.error(f"Schedule generation failed: {str(e)}") + return json.dumps(DEFAULT_SCHEDULE, indent=2) +``` + +### app\services\tools.py + +```python +from app.services.websearch import WebSearch +from app.services.vector_database_search import VectorDatabaseSearch +from typing import Dict, List, Any +import logging + +logger = logging.getLogger(__name__) + +def get_web_search(query: str, num_results: int = 4) -> dict: + """ + Performs web search for the given query. + Use this tool to search for current dermatological information on the internet. + Call this tool with medical queries to get up-to-date information. + + Args: + query: Search query string - should be medical/dermatological related + num_results: Number of results to return (default 4) + + Returns: + Dictionary with status and results containing medical information + """ + try: + logger.info(f"🔍 WEB SEARCH TOOL CALLED with query: '{query}', num_results: {num_results}") + web = WebSearch(num_results=num_results) + results = web.search(query) + + if not results: + logger.warning(f"❌ No web search results found for query: '{query}'") + return { + "status": "error", + "error_message": f"No results found for '{query}'." + } + + # Format results for the agent + formatted_results = [] + for idx, result in enumerate(results, 1): + formatted_result = { + "source_number": idx, + "title": result.get('title', ''), + "link": result.get('link', ''), + "snippet": result.get('text', '') + } + formatted_results.append(formatted_result) + logger.info(f"📄 Web search result {idx}: {formatted_result['title']}") + + logger.info(f"✅ WEB SEARCH COMPLETED successfully. Found {len(formatted_results)} results") + return { + "status": "success", + "results": formatted_results + } + except Exception as e: + logger.error(f"❌ WEB SEARCH ERROR for query '{query}': {e}", exc_info=True) + return { + "status": "error", + "error_message": f"Web search failed: {str(e)}" + } + +def get_vector_search(query: str, top_k: int = 5) -> dict: + """ + Performs vector database search for the given query. + Use this tool to search for medical information in the specialized dermatology database. + Call this tool with medical queries to get evidence-based dermatological information. + + Args: + query: Search query string - should be medical/dermatological related + top_k: Number of results to return (default 5) + + Returns: + Dictionary with status and results containing medical database information + """ + try: + logger.info(f"🔍 VECTOR SEARCH TOOL CALLED with query: '{query}', top_k: {top_k}") + vector = VectorDatabaseSearch() + + if not vector.is_available(): + logger.error("❌ Vector database is not available") + return { + "status": "error", + "error_message": "Vector database is not available" + } + + results = vector.search(query, top_k=top_k) + + if not results: + logger.warning(f"❌ No vector search results found for query: '{query}'") + return { + "status": "error", + "error_message": f"No results found for '{query}'." + } + + # Format results for the agent + formatted_results = [] + for idx, result in enumerate(results, 1): + confidence = result.get('confidence', 0) + if confidence > 30: + formatted_result = { + "source_number": idx, + "content": result.get('content', ''), + "source": result.get('source', 'Unknown'), + "page": result.get('page', 0), + "confidence": confidence + } + formatted_results.append(formatted_result) + logger.info(f"📚 Vector search result {idx}: confidence={confidence}, source={formatted_result['source']}") + + logger.info(f"✅ VECTOR SEARCH COMPLETED successfully. Found {len(formatted_results)} high-confidence results") + return { + "status": "success", + "results": formatted_results + } + except Exception as e: + logger.error(f"❌ VECTOR SEARCH ERROR for query '{query}': {e}", exc_info=True) + return { + "status": "error", + "error_message": f"Vector search failed: {str(e)}" + } + +def get_image_search(query: str, max_images: int = 3) -> dict: + """ + Performs image search for the given query. + Use this tool to find relevant medical/dermatological images. + Call this tool with medical terms to get visual references for skin conditions. + + Args: + query: Search query string - should be medical/dermatological related + max_images: Maximum number of images to return (default 3) + + Returns: + Dictionary with status and image URLs for medical references + """ + try: + logger.info(f"🖼️ IMAGE SEARCH TOOL CALLED with query: '{query}', max_images: {max_images}") + web = WebSearch(max_images=max_images) + results = web.search_images(query) + + if not results: + logger.warning(f"❌ No images found for query: '{query}'") + return { + "status": "error", + "error_message": f"No images found for '{query}'." + } + + # Limit to max_images + limited_results = results[:max_images] + + logger.info(f"✅ IMAGE SEARCH COMPLETED successfully. Found {len(limited_results)} images") + for i, img_url in enumerate(limited_results, 1): + logger.info(f"🖼️ Image {i}: {img_url}") + + return { + "status": "success", + "images": limited_results + } + except Exception as e: + logger.error(f"❌ IMAGE SEARCH ERROR for query '{query}': {e}", exc_info=True) + return { + "status": "error", + "error_message": f"Image search failed: {str(e)}" + } +``` + +### app\services\vector_database_search.py + +```python +import os +import uuid +from langchain_community.document_loaders import PyPDFLoader +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_google_genai import GoogleGenerativeAIEmbeddings +from langchain_qdrant import Qdrant +from qdrant_client import QdrantClient, models +from qdrant_client.http.exceptions import UnexpectedResponse +from dotenv import load_dotenv +import logging + +load_dotenv() + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY") +QDRANT_URL = os.getenv("QDRANT_URL") +QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") +QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "dermatology_docs") + +class VectorDatabaseSearch: + def __init__(self, collection_name=QDRANT_COLLECTION_NAME): + self.collection_name = collection_name + self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") + self.client = None + self.vectorstore = None + self.is_initialized = False + + # Initialize connection + self._initialize_connection() + + def _initialize_connection(self): + """Initialize Qdrant connection with proper error handling""" + try: + # Check if credentials are available + if not QDRANT_URL or not QDRANT_API_KEY: + logger.warning("Qdrant credentials not found. Vector search will be disabled.") + self.is_initialized = False + return + + # Initialize Qdrant client + self.client = QdrantClient( + url=QDRANT_URL, + api_key=QDRANT_API_KEY, + timeout=30 # Add timeout + ) + + # Test connection + self.client.get_collections() + + # Initialize collection + self._initialize_collection() + + # Initialize vector store + self.vectorstore = Qdrant( + client=self.client, + collection_name=self.collection_name, + embeddings=self.embeddings + ) + + self.is_initialized = True + logger.info(f"Successfully connected to Qdrant collection: {self.collection_name}") + + except UnexpectedResponse as e: + logger.error(f"Authentication error with Qdrant: {e}") + self.is_initialized = False + except Exception as e: + logger.error(f"Error initializing Qdrant connection: {e}") + self.is_initialized = False + + def _initialize_collection(self): + """Initialize Qdrant collection if it doesn't exist""" + if not self.client: + return + + try: + collections = self.client.get_collections() + collection_exists = any(c.name == self.collection_name for c in collections.collections) + + if not collection_exists: + self.client.create_collection( + collection_name=self.collection_name, + vectors_config=models.VectorParams( + size=768, + distance=models.Distance.COSINE + ) + ) + logger.info(f"Created new collection: {self.collection_name}") + else: + # Check if collection has data + collection_info = self.client.get_collection(self.collection_name) + logger.info(f"Collection {self.collection_name} exists with {collection_info.points_count} points") + + except Exception as e: + logger.error(f"Error initializing collection: {e}") + self.is_initialized = False + + def add_pdf(self, pdf_path): + """Add PDF to vector database""" + if not self.is_initialized: + logger.error("Vector database not initialized. Cannot add PDF.") + return False + + try: + loader = PyPDFLoader(pdf_path) + docs = loader.load() + splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) + split_docs = splitter.split_documents(docs) + + book_name = os.path.splitext(os.path.basename(pdf_path))[0] + logger.info(f"Processing {book_name} with {len(split_docs)} chunks") + + for doc in split_docs: + doc.metadata = { + "source": book_name, + "page": doc.metadata.get('page', 1), + "id": str(uuid.uuid4()) + } + + self.vectorstore.add_documents(split_docs) + logger.info(f"Successfully added {len(split_docs)} chunks from {book_name}") + return True + + except Exception as e: + logger.error(f"Error adding PDF: {e}") + return False + + def search(self, query, top_k=5): + """Search documents based on query""" + if not self.is_initialized: + logger.warning("Vector database not initialized. Returning empty results.") + return [] + + try: + # Check if collection has any data + collection_info = self.client.get_collection(self.collection_name) + if collection_info.points_count == 0: + logger.warning(f"Collection {self.collection_name} is empty. No documents to search.") + return [] + + # Perform similarity search + results = self.vectorstore.similarity_search_with_score(query, k=top_k) + + formatted = [] + for doc, score in results: + # Convert score to confidence percentage (cosine similarity) + confidence = (1 - score) * 100 # Qdrant returns distance, not similarity + + formatted.append({ + "source": doc.metadata.get('source', 'Unknown'), + "page": doc.metadata.get('page', 0), + "content": doc.page_content[:500], + "confidence": round(confidence, 2) + }) + + logger.info(f"Found {len(formatted)} results for query: {query[:50]}...") + return formatted + + except Exception as e: + logger.error(f"Search error: {e}") + return [] + + def get_book_info(self): + """Retrieve list of unique book sources in the collection""" + if not self.is_initialized: + logger.warning("Vector database not initialized.") + return [] + + try: + # Check if collection exists + collections = self.client.get_collections() + if not any(c.name == self.collection_name for c in collections.collections): + logger.info(f"Collection {self.collection_name} does not exist yet") + return [] + + # Get collection info + collection_info = self.client.get_collection(self.collection_name) + if collection_info.points_count == 0: + logger.info("Collection is empty") + return [] + + # Get sample of points to extract sources + points = self.client.scroll( + collection_name=self.collection_name, + limit=min(1000, collection_info.points_count), + with_payload=True, + with_vectors=False + )[0] + + books = set() + for point in points: + if hasattr(point, 'payload') and point.payload: + if 'metadata' in point.payload and 'source' in point.payload['metadata']: + books.add(point.payload['metadata']['source']) + elif 'source' in point.payload: + books.add(point.payload['source']) + + logger.info(f"Found {len(books)} unique books in collection") + return list(books) + + except Exception as e: + logger.error(f"Error retrieving book info: {e}") + return [] + + def is_available(self): + """Check if vector database is available and has data""" + if not self.is_initialized: + return False + + try: + collection_info = self.client.get_collection(self.collection_name) + return collection_info.points_count > 0 + except: + return False + +``` + +### app\services\websearch.py + +```python +import re +import warnings +import requests +from bs4 import BeautifulSoup +import urllib.parse +import time +import random +from urllib.parse import urlparse, parse_qs + +warnings.simplefilter('ignore', requests.packages.urllib3.exceptions.InsecureRequestWarning) + +class WebSearch: + def __init__(self, num_results=4, max_chars_per_page=6000, max_images=10): + self.num_results = num_results + self.max_chars_per_page = max_chars_per_page + self.reference = [] + self.results = [] + self.max_images = max_images + self.headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate', + 'DNT': '1', + 'Connection': 'keep-alive', + } + # Common domains for direct content + self.content_domains = [ + "wikipedia.org", "webmd.com", "mayoclinic.org", "healthline.com", "nih.gov", + "clevelandclinic.org", "nhs.uk", "cdc.gov", "medlineplus.gov", "hopkinsmedicine.org" + ] + # Ad and tracking domains to filter out + self.blocked_domains = [ + "ad.doubleclick.net", "googleadservices.com", "bing.com/aclick", "duckduckgo.com/y.js", + "amazon.com/s", "ads.google.com", "analytics", "tracker", "pixel", "adservice" + ] + + def is_valid_url(self, url): + """Check if URL is valid and not an ad/tracking URL""" + if not url or len(url) < 10: + return False + + try: + parsed = urlparse(url) + + # Check if URL has a valid scheme and netloc + if not all([parsed.scheme, parsed.netloc]): + return False + + # Filter out ad/tracking URLs + domain = parsed.netloc.lower() + path = parsed.path.lower() + query = parsed.query.lower() + + # Block URLs containing ad-related indicators + for blocked in self.blocked_domains: + if blocked in domain or blocked in path: + return False + + # Block URLs with ad-related query parameters + if any(param in query for param in ["ad", "click", "track", "clkid", "msclkid"]): + return False + + # Extra check for redirect URLs + if "redirect" in path or "goto" in path or "go.php" in path: + return False + + # Reject extremely long URLs (often tracking) + if len(url) > 500: + return False + + return True + + except Exception: + return False + + def clean_url(self, url): + """Clean the URL by removing tracking parameters""" + try: + parsed = urlparse(url) + + # List of known tracking parameters to remove + tracking_params = [ + 'utm_', 'ref_', 'ref=', 'refid', 'fbclid', 'gclid', 'msclkid', 'dclid', + 'zanpid', 'icid', 'igshid', 'mc_eid', '_hsenc', 'mkt_tok', 'yclid' + ] + + # Parse query parameters + query_params = parse_qs(parsed.query) + + # Remove tracking parameters + filtered_params = { + k: v for k, v in query_params.items() + if not any(tracker in k.lower() for tracker in tracking_params) + } + + # Rebuild query string + clean_query = urllib.parse.urlencode(filtered_params, doseq=True) if filtered_params else "" + + # Reconstruct URL + clean_url = urllib.parse.urlunparse(( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + clean_query, + "" # Remove fragment + )) + + return clean_url + + except Exception: + # If any error occurs, return the original URL + return url + + def extract_real_url_from_redirect(self, url): + """Extract the actual URL from a redirect URL""" + try: + parsed = urlparse(url) + + # Handle DuckDuckGo redirects + if "duckduckgo.com" in parsed.netloc and "u3=" in parsed.query: + params = parse_qs(parsed.query) + if "u3" in params and params["u3"]: + redirect_url = params["u3"][0] + # Handle nested redirects (like Bing redirects inside DuckDuckGo) + if "bing.com/aclick" in redirect_url: + bing_parsed = urlparse(redirect_url) + bing_params = parse_qs(bing_parsed.query) + if "u" in bing_params and bing_params["u"]: + decoded_url = urllib.parse.unquote(bing_params["u"][0]) + return self.clean_url(decoded_url) + return self.clean_url(redirect_url) + + # Handle Bing redirects + if "bing.com/aclick" in url: + params = parse_qs(parsed.query) + if "u" in params and params["u"]: + return self.clean_url(urllib.parse.unquote(params["u"][0])) + + return url + + except Exception: + return url + + def extract_text_from_webpage(self, html_content): + soup = BeautifulSoup(html_content, "html.parser") + + # Remove non-content elements + for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg", + "aside", "iframe", "noscript", "img", "figure", "button"]): + tag.extract() + + # Extract text and normalize spacing + text = ' '.join(soup.stripped_strings) + text = re.sub(r'\s+', ' ', text).strip() + + return text + + def search(self, query): + results = [] + encoded_query = urllib.parse.quote(query) + url = f'https://html.duckduckgo.com/html/?q={encoded_query}' + + try: + with requests.Session() as session: + session.headers.update(self.headers) + + response = session.get(url, timeout=10) + soup = BeautifulSoup(response.text, 'html.parser') + + # Getting more results than needed to account for filtering + search_results = soup.find_all('div', class_='result')[:self.num_results * 2] + links = [] + + # Extract and process links + for result in search_results: + link_tag = result.find('a', class_='result__a') + if not link_tag or not link_tag.get('href'): + continue + + original_link = link_tag['href'] + + # Process link to get the actual URL + clean_link = self.extract_real_url_from_redirect(original_link) + + # Validate the URL + if self.is_valid_url(clean_link): + links.append(clean_link) + + # Prioritize content domains + prioritized_links = [] + other_links = [] + + for link in links: + if any(domain in link for domain in self.content_domains): + prioritized_links.append(link) + else: + other_links.append(link) + + # Combine prioritized links first, then others + final_links = prioritized_links + other_links + + # Limit to unique links up to num_results + unique_links = [] + seen_domains = set() + + for link in final_links: + domain = urlparse(link).netloc + if domain not in seen_domains and len(unique_links) < self.num_results: + unique_links.append(link) + seen_domains.add(domain) + + from concurrent.futures import ThreadPoolExecutor, as_completed + + def fetch_page(link): + try: + # Random delay to avoid being blocked + time.sleep(random.uniform(0.5, 1.5)) + + # Set a longer timeout for reliable fetching + page_response = session.get(link, timeout=10, verify=False) + + # Only process HTML content + if 'text/html' not in page_response.headers.get('Content-Type', ''): + return None + + page_soup = BeautifulSoup(page_response.text, 'lxml') + + # Remove non-content elements + [tag.decompose() for tag in page_soup(['script', 'style', 'header', 'footer', + 'nav', 'form', 'iframe', 'noscript'])] + + # Extract text with better formatting + text = ' '.join(page_soup.stripped_strings) + text = re.sub(r'\s+', ' ', text).strip() + + title = page_soup.title.string if page_soup.title else "Untitled Page" + + return { + 'link': link, + 'title': title, + 'text': text[:self.max_chars_per_page] + } + except Exception as e: + print(f"Error fetching {link}: {str(e)}") + return None + + with ThreadPoolExecutor(max_workers=min(len(unique_links), 4)) as executor: + future_to_url = {executor.submit(fetch_page, link): link for link in unique_links} + + for future in as_completed(future_to_url): + result = future.result() + if result: + results.append(result) + + return results + + except Exception as e: + print(f"Search error: {str(e)}") + return [] + + def search_images(self, query): + images = [] + encoded_query = urllib.parse.quote(query) + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1' + } + + # Try multiple sources for better results + image_sources = [ + f"https://www.google.com/search?q={encoded_query}&tbm=isch&hl=en", + f"https://www.bing.com/images/search?q={encoded_query}&form=HDRSC2&first=1", + f"https://duckduckgo.com/?q={encoded_query}&iar=images&iax=images&ia=images" + ] + + for source_url in image_sources: + try: + time.sleep(random.uniform(0.5, 1.0)) # Polite delay + response = requests.get(source_url, headers=headers, verify=False, timeout=10) + soup = BeautifulSoup(response.text, 'html.parser') + + # Extract image URLs from img tags + for img in soup.find_all('img'): + src = img.get('src', '') + if src and src.startswith('http') and self.is_image_url(src): + cleaned_url = self.clean_url(src) + if self.is_valid_image(cleaned_url): + images.append(cleaned_url) + + # Extract image URLs from scripts (useful for Google Images) + for script in soup.find_all('script'): + if script.string: + urls = re.findall(r'https?://[^\s<>"\']+?(?:\.(?:jpg|jpeg|png|gif|bmp|webp))', script.string) + for url in urls: + cleaned_url = self.clean_url(url) + if self.is_valid_image(cleaned_url): + images.append(cleaned_url) + + except Exception as e: + print(f"Error searching images at {source_url}: {str(e)}") + continue + + # Remove duplicates while preserving order + seen = set() + unique_images = [x for x in images if not (x in seen or seen.add(x))] + + # Filter out small images and suspicious URLs + filtered_images = [img for img in unique_images if self.is_valid_image(img)] + + return filtered_images[:self.max_images] + + def is_image_url(self, url): + """Check if URL points to an image file""" + image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp') + return any(url.lower().endswith(ext) for ext in image_extensions) + + def is_valid_image(self, url): + """Additional validation for image URLs""" + try: + # Reject tiny images (often icons) and tracking pixels + if re.search(r'(?:icon|pixel|tracker|thumb|logo|button)\d*\.(?:jpg|png|gif)', url.lower()): + return False + + # Avoid suspicious domains for images + parsed = urlparse(url) + if any(bad in parsed.netloc.lower() for bad in ["tracker", "pixel", "counter", "ad."]): + return False + + # Avoid very short URLs (likely not valid images) + if len(url) < 30: + return False + + return True + except: + return False +``` + +### app\services\wheel.py + +```python +from app.services.chathistory import ChatSession +from app.services.environmental_condition import EnvironmentalData + + +def map_air_quality_index(aqi): + if aqi <= 50: + return {"displayValue": "Good", "value": aqi, "color": "#00C853"} + elif aqi <= 100: + return {"displayValue": "Moderate", "value": aqi, "color": "#FFB74D"} + elif aqi <= 150: + return {"displayValue": "Unhealthy Tolerate", "value": aqi, "color": "#FF7043"} + elif aqi <= 200: + return {"displayValue": "Unhealthy", "value": aqi, "color": "#E53935"} + else: + return {"displayValue": "Very Unhealthy", "value": aqi, "color": "#8E24AA"} + + +def map_pollution_level(aqi): + if aqi <= 50: + return 20 + elif aqi <= 100: + return 40 + elif aqi <= 150: + return 60 + elif aqi <= 200: + return 80 + else: + return 100 + +class CityNotProvidedError(Exception): + pass + + +class EnvironmentalConditions: + def __init__(self, session_id): + self.session_id = session_id + self.chat_session = ChatSession(session_id, "session_id") + self.user_city = self.chat_session.get_city() + + if not self.user_city: + raise CityNotProvidedError("City information is required but not provided") + + self.city = self.user_city + self.environment_data = EnvironmentalData(self.city) + + def get_conditon(self): + data = self.environment_data.get_environmental_data() + + formatted_data = [ + { + "label": "Humidity", + # Handle decimal values by converting to float first + "value": int(float(data['Humidity'].strip(' %'))), + "color": "#4FC3F7", + "icon": "FaTint", + "type": "numeric" + }, + { + "label": "UV Rays", + "value": data['UV_Index'] * 10, + "color": "#FFB74D", + "icon": "FaSun", + "type": "numeric" + }, + { + "label": "Pollution", + "value": map_pollution_level(data['Air Quality Index']), + "color": "#F06292", + "icon": "FaLeaf", + "type": "numeric" + }, + { + "label": "Air Quality", + **map_air_quality_index(data['Air Quality Index']), + "icon": "FaCloud", + "type": "categorical" + }, + { + "label": "Wind", + "value": float(data['Wind Speed'].strip(' m/s')) * 10, + "color": "#9575CD", + "icon": "FaWind", + "type": "numeric" + }, + { + "label": "Temperature", + "value": int(float(data['Temperature'].strip(' °C'))), + "color": "#FF7043", + "icon": "FaThermometerHalf", + "type": "numeric" + } + ] + + return formatted_data +``` + +### app.py + +```python +import uvicorn +from app.main import app + +if __name__ == "__main__": + uvicorn.run("app.main:app", host="0.0.0.0", port=5000, reload=True) +``` + +### pyproject.toml + +```toml +[project] +name = "derm_ai" +version = "0.1.0" +description = "This is derm_ai backend" +authors = [ + { name = "Muhammad Noman", email = "muhammadnoman76@gmail.com" } +] +dependencies = [ + "beautifulsoup4==4.13.4", + "fastapi==0.115.12", + "google-genai==1.36.0", + "huggingface_hub==0.30.2", + "langchain_community==0.3.23", + "langchain_google_genai==2.1.4", + "langchain_qdrant==0.2.0", + "langchain_text_splitters==0.3.8", + "nltk==3.9.1", + "numpy==2.2.4", + "pillow==11.2.1", + "pydantic[email]==2.11.3", + "pymongo==4.12.1", + "pypdf==5.4.0", + "PyJWT==1.7.1", + "python-dotenv==1.1.0", + "qdrant_client==1.14.2", + "requests==2.32.3", + "scikit-learn==1.6.1", + "sendgrid==6.11.0", + "torch==2.5.1", + "torchvision==0.20.1", + "transformers==4.51.3", + "werkzeug==3.1.3", + "yake==0.4.8", + "uvicorn==0.34.1", + "python-multipart==0.0.20", + "g4f==0.5.2.1", + "mammoth==1.9.0", + "markdownify==1.1.0", + "pandas==2.2.3", + "pdfminer.six==20250416", + "python-pptx==1.0.2", + "puremagic==1.28", + "charset-normalizer==3.4.1", + "pytesseract==0.3.13", + "langchain-google-genai" +] + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +``` +