Spaces:
Sleeping
Sleeping
Mohammed Afsal commited on
Commit ·
cece1eb
1
Parent(s): 9c9b5e0
Initial Commit
Browse files- .gitignore +5 -0
- Dockerfile +28 -0
- TRAINING DATA/students/Ahmed/metadata.json +13 -0
- TRAINING DATA/students/Ahmed/summary.txt +4 -0
- TRAINING DATA/students/Aisha/metadata.json +13 -0
- TRAINING DATA/students/Aisha/summary.txt +5 -0
- TRAINING DATA/students/David/metadata.json +13 -0
- TRAINING DATA/students/David/summary.txt +4 -0
- TRAINING DATA/students/Kenji/metadata.json +13 -0
- TRAINING DATA/students/Kenji/summary.txt +4 -0
- TRAINING DATA/students/Lisa/metadata.json +13 -0
- TRAINING DATA/students/Lisa/summary.txt +4 -0
- TRAINING DATA/students/Maria/metadata.json +13 -0
- TRAINING DATA/students/Maria/summary.txt +4 -0
- TRAINING DATA/students/Mark/metadata.json +13 -0
- TRAINING DATA/students/Mark/summary.txt +4 -0
- TRAINING DATA/students/Priya/metadata.json +13 -0
- TRAINING DATA/students/Priya/summary.txt +4 -0
- TRAINING DATA/students/Rahul/metadata.json +13 -0
- TRAINING DATA/students/Rahul/summary.txt +4 -0
- TRAINING DATA/students/Sofia/metadata.json +13 -0
- TRAINING DATA/students/Sofia/summary.txt +4 -0
- __init__.py +0 -0
- app.py +265 -0
- requirements.txt +9 -0
- space.yaml +2 -0
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
venv/
|
| 3 |
+
.venv/
|
| 4 |
+
tests/
|
| 5 |
+
__pycache__/
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Base image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Environment variables
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
+
PYTHONUNBUFFERED=1 \
|
| 7 |
+
PIP_NO_CACHE_DIR=1
|
| 8 |
+
|
| 9 |
+
# Set working directory
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# Install system dependencies
|
| 13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 14 |
+
build-essential curl \
|
| 15 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
# Copy requirement file and install dependencies
|
| 18 |
+
COPY requirements.txt .
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Copy source code
|
| 22 |
+
COPY . .
|
| 23 |
+
|
| 24 |
+
# Expose port for FastMCP
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
# Start the FastMCP server
|
| 28 |
+
CMD ["python", "app.py"]
|
TRAINING DATA/students/Ahmed/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024006",
|
| 3 |
+
"name": "Ahmed Hassan",
|
| 4 |
+
"email": "ahmed.hassan@university.edu",
|
| 5 |
+
"department": "Urban Planning & Computer Science",
|
| 6 |
+
"project_title": "AI-Optimized Traffic Flow Management System",
|
| 7 |
+
"submission_date": "2024-01-28",
|
| 8 |
+
"academic_year": "Third Year",
|
| 9 |
+
"supervisor": "Dr. Lisa Wang",
|
| 10 |
+
"keywords": ["smart cities", "traffic optimization", "urban planning", "AI"],
|
| 11 |
+
"grade": "A-",
|
| 12 |
+
"word_count": 2350
|
| 13 |
+
}
|
TRAINING DATA/students/Ahmed/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AI system reduces urban traffic congestion by 32% in simulations.
|
| 2 |
+
Uses real-time traffic camera data and historical patterns.
|
| 3 |
+
Dynamic traffic light timing optimization algorithm.
|
| 4 |
+
Scalable solution for municipalities with existing camera infrastructure.
|
TRAINING DATA/students/Aisha/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024001",
|
| 3 |
+
"name": "Aisha Khan",
|
| 4 |
+
"email": "aisha.khan@university.edu",
|
| 5 |
+
"department": "Computer Science & Medicine",
|
| 6 |
+
"project_title": "AI-Powered Early Detection of Diabetic Retinopathy",
|
| 7 |
+
"submission_date": "2024-01-15",
|
| 8 |
+
"academic_year": "Final Year",
|
| 9 |
+
"supervisor": "Dr. Sarah Chen",
|
| 10 |
+
"keywords": ["medical AI", "computer vision", "healthcare", "deep learning"],
|
| 11 |
+
"grade": "A",
|
| 12 |
+
"word_count": 2450
|
| 13 |
+
}
|
TRAINING DATA/students/Aisha/summary.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Project develops CNN model for early diabetic retinopathy detection
|
| 2 |
+
using retinal fundus images. Achieved 94.3% accuracy on test dataset.
|
| 3 |
+
Implements real-time screening system with 2-second processing time.
|
| 4 |
+
Potential impact: Early intervention for diabetic patients.
|
| 5 |
+
Technology: Python, TensorFlow, OpenCV, Medical Imaging Dataset.
|
TRAINING DATA/students/David/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024008",
|
| 3 |
+
"name": "David Miller",
|
| 4 |
+
"email": "david.miller@university.edu",
|
| 5 |
+
"department": "Environmental Science",
|
| 6 |
+
"project_title": "Real-time Air Quality Monitoring Network with Predictive Analytics",
|
| 7 |
+
"submission_date": "2024-02-05",
|
| 8 |
+
"academic_year": "Third Year",
|
| 9 |
+
"supervisor": "Dr. Patricia Brown",
|
| 10 |
+
"keywords": ["environmental monitoring", "air quality", "IoT", "predictive analytics"],
|
| 11 |
+
"grade": "B+",
|
| 12 |
+
"word_count": 2100
|
| 13 |
+
}
|
TRAINING DATA/students/David/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Deployed network of 15 low-cost air quality sensors across city.
|
| 2 |
+
Real-time pollution mapping and 24-hour prediction accuracy of 89%.
|
| 3 |
+
Public dashboard with health recommendations.
|
| 4 |
+
Early warning system for schools and hospitals during poor air quality.
|
TRAINING DATA/students/Kenji/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024004",
|
| 3 |
+
"name": "Kenji Tanaka",
|
| 4 |
+
"email": "kenji.tanaka@university.edu",
|
| 5 |
+
"department": "Robotics Engineering",
|
| 6 |
+
"project_title": "Autonomous Delivery Robot with Obstacle Avoidance",
|
| 7 |
+
"submission_date": "2024-01-22",
|
| 8 |
+
"academic_year": "Second Year",
|
| 9 |
+
"supervisor": "Dr. Emily Zhang",
|
| 10 |
+
"keywords": ["robotics", "autonomous systems", "computer vision", "path planning"],
|
| 11 |
+
"grade": "B+",
|
| 12 |
+
"word_count": 1950
|
| 13 |
+
}
|
TRAINING DATA/students/Kenji/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Developed autonomous robot for campus package delivery.
|
| 2 |
+
Uses LIDAR and camera fusion for obstacle detection and avoidance.
|
| 3 |
+
Successfully completed 50 test deliveries across campus.
|
| 4 |
+
Maximum payload: 5kg, Battery life: 6 hours continuous operation.
|
TRAINING DATA/students/Lisa/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024007",
|
| 3 |
+
"name": "Lisa Johnson",
|
| 4 |
+
"email": "lisa.johnson@university.edu",
|
| 5 |
+
"department": "Education Technology",
|
| 6 |
+
"project_title": "Personalized Learning Platform Using Adaptive Algorithms",
|
| 7 |
+
"submission_date": "2024-02-01",
|
| 8 |
+
"academic_year": "Final Year",
|
| 9 |
+
"supervisor": "Dr. David Lee",
|
| 10 |
+
"keywords": ["edtech", "adaptive learning", "personalization", "education"],
|
| 11 |
+
"grade": "A",
|
| 12 |
+
"word_count": 2700
|
| 13 |
+
}
|
TRAINING DATA/students/Lisa/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Adaptive learning platform personalizes content based on student performance.
|
| 2 |
+
Tested with 200 students showing 45% improvement in learning outcomes.
|
| 3 |
+
Real-time assessment and content recommendation engine.
|
| 4 |
+
Mobile-first design with offline capability for remote areas.
|
TRAINING DATA/students/Maria/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024003",
|
| 3 |
+
"name": "Maria Gonzalez",
|
| 4 |
+
"email": "maria.gonzalez@university.edu",
|
| 5 |
+
"department": "Renewable Energy Engineering",
|
| 6 |
+
"project_title": "Machine Learning Optimization of Solar Panel Efficiency",
|
| 7 |
+
"submission_date": "2024-01-20",
|
| 8 |
+
"academic_year": "Final Year",
|
| 9 |
+
"supervisor": "Dr. James Wilson",
|
| 10 |
+
"keywords": ["solar energy", "machine learning", "optimization", "clean tech"],
|
| 11 |
+
"grade": "A",
|
| 12 |
+
"word_count": 2800
|
| 13 |
+
}
|
TRAINING DATA/students/Maria/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ML model predicts optimal solar panel angles based on weather patterns.
|
| 2 |
+
Increases energy generation by 18% compared to fixed-angle systems.
|
| 3 |
+
Real-time adjustment system using servo motors and weather API.
|
| 4 |
+
Cost-effective solution for residential solar installations.
|
TRAINING DATA/students/Mark/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024010",
|
| 3 |
+
"name": "Mark Thompson",
|
| 4 |
+
"email": "mark.thompson@university.edu",
|
| 5 |
+
"department": "Supply Chain Management",
|
| 6 |
+
"project_title": "Blockchain-based Supply Chain Transparency System",
|
| 7 |
+
"submission_date": "2024-02-12",
|
| 8 |
+
"academic_year": "Third Year",
|
| 9 |
+
"supervisor": "Dr. Nancy Chen",
|
| 10 |
+
"keywords": ["blockchain", "supply chain", "transparency", "logistics"],
|
| 11 |
+
"grade": "A-",
|
| 12 |
+
"word_count": 2300
|
| 13 |
+
}
|
TRAINING DATA/students/Mark/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Blockchain system provides end-to-end supply chain visibility.
|
| 2 |
+
Reduces counterfeit products by tracking authentic product journey.
|
| 3 |
+
Implemented for agricultural supply chain with 50+ participants.
|
| 4 |
+
Real-time tracking and automated smart contracts for payments.
|
TRAINING DATA/students/Priya/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024009",
|
| 3 |
+
"name": "Priya Patel",
|
| 4 |
+
"email": "priya.patel@university.edu",
|
| 5 |
+
"department": "Biomedical Engineering",
|
| 6 |
+
"project_title": "Wearable ECG Monitor with Real-time Arrhythmia Detection",
|
| 7 |
+
"submission_date": "2024-02-08",
|
| 8 |
+
"academic_year": "Final Year",
|
| 9 |
+
"supervisor": "Dr. Andrew Taylor",
|
| 10 |
+
"keywords": ["wearable tech", "health monitoring", "ECG", "medical devices"],
|
| 11 |
+
"grade": "A",
|
| 12 |
+
"word_count": 2550
|
| 13 |
+
}
|
TRAINING DATA/students/Priya/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Low-cost wearable ECG monitor detects arrhythmias in real-time.
|
| 2 |
+
96% accuracy compared to clinical-grade equipment.
|
| 3 |
+
Continuous monitoring with emergency alert system to caregivers.
|
| 4 |
+
Battery life: 72 hours, Water-resistant design for daily use.
|
TRAINING DATA/students/Rahul/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024002",
|
| 3 |
+
"name": "Rahul Sharma",
|
| 4 |
+
"email": "rahul.sharma@university.edu",
|
| 5 |
+
"department": "Agricultural Engineering",
|
| 6 |
+
"project_title": "IoT-Based Smart Irrigation System with Crop Health Monitoring",
|
| 7 |
+
"submission_date": "2024-01-18",
|
| 8 |
+
"academic_year": "Third Year",
|
| 9 |
+
"supervisor": "Dr. Michael Brown",
|
| 10 |
+
"keywords": ["IoT", "precision agriculture", "sensors", "water conservation"],
|
| 11 |
+
"grade": "A-",
|
| 12 |
+
"word_count": 2200
|
| 13 |
+
}
|
TRAINING DATA/students/Rahul/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
IoT system monitors soil moisture, temperature, and crop health.
|
| 2 |
+
Automated irrigation reduces water usage by 40% compared to traditional methods.
|
| 3 |
+
Real-time alerts for farmers via mobile application.
|
| 4 |
+
Deployed prototype on 5-acre test farm with successful results.
|
TRAINING DATA/students/Sofia/metadata.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"student_id": "STU2024005",
|
| 3 |
+
"name": "Sofia Rodriguez",
|
| 4 |
+
"email": "sofia.rodriguez@university.edu",
|
| 5 |
+
"department": "Computational Finance",
|
| 6 |
+
"project_title": "Predictive Analytics for Stock Market Trends Using Sentiment Analysis",
|
| 7 |
+
"submission_date": "2024-01-25",
|
| 8 |
+
"academic_year": "Final Year",
|
| 9 |
+
"supervisor": "Dr. Robert Kim",
|
| 10 |
+
"keywords": ["financial analytics", "sentiment analysis", "trading", "NLP"],
|
| 11 |
+
"grade": "A",
|
| 12 |
+
"word_count": 2600
|
| 13 |
+
}
|
TRAINING DATA/students/Sofia/summary.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Combines news sentiment analysis with technical indicators for stock prediction.
|
| 2 |
+
Achieved 67% accuracy in predicting 5-day price movements.
|
| 3 |
+
Real-time dashboard for investment decision support.
|
| 4 |
+
Backtested on S&P 500 data from 2018-2023 showing consistent performance.
|
__init__.py
ADDED
|
File without changes
|
app.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import uuid
|
| 4 |
+
import time
|
| 5 |
+
from datetime import datetime, timezone
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, List, Optional
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
from fastmcp import FastMCP
|
| 12 |
+
|
| 13 |
+
# -------- Config: data root resolution (robust, works locally & on HF) --------
|
| 14 |
+
def resolve_data_root() -> Path:
|
| 15 |
+
# 1) Env var wins if set
|
| 16 |
+
env = os.getenv("DATA_ROOT")
|
| 17 |
+
if env:
|
| 18 |
+
p = Path(env).expanduser().resolve()
|
| 19 |
+
if p.exists():
|
| 20 |
+
return p
|
| 21 |
+
|
| 22 |
+
# 2) Try common repo-relative locations (first that exists wins)
|
| 23 |
+
candidates = [
|
| 24 |
+
Path.cwd() / "TRAINING DATA", # your exact folder with space
|
| 25 |
+
Path.cwd() / "training_data",
|
| 26 |
+
Path.cwd() / "data",
|
| 27 |
+
]
|
| 28 |
+
for c in candidates:
|
| 29 |
+
if c.exists():
|
| 30 |
+
return c.resolve()
|
| 31 |
+
|
| 32 |
+
# 3) Last resort: create ./TRAINING DATA to avoid crashes
|
| 33 |
+
fallback = Path.cwd() / "TRAINING DATA"
|
| 34 |
+
fallback.mkdir(parents=True, exist_ok=True)
|
| 35 |
+
return fallback.resolve()
|
| 36 |
+
|
| 37 |
+
DATA_ROOT: Path = resolve_data_root()
|
| 38 |
+
STUDENTS_DIR: Path = DATA_ROOT / "students"
|
| 39 |
+
|
| 40 |
+
# -------- Small utilities --------
|
| 41 |
+
def _receipt() -> Dict[str, Any]:
|
| 42 |
+
return {
|
| 43 |
+
"tool_used": True,
|
| 44 |
+
"server_time": datetime.now(timezone.utc).isoformat(),
|
| 45 |
+
"request_id": str(uuid.uuid4()),
|
| 46 |
+
"data_root": str(DATA_ROOT),
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
def _norm(s: Optional[str]) -> str:
|
| 50 |
+
return (s or "").strip()
|
| 51 |
+
|
| 52 |
+
def _ci_contains(hay: Optional[str], needle: Optional[str]) -> bool:
|
| 53 |
+
return _norm(needle).lower() in _norm(hay).lower()
|
| 54 |
+
|
| 55 |
+
# -------- Load metadata (from DATA_ROOT/students/*/metadata.json and/or DATA_ROOT/metadata.json) --------
|
| 56 |
+
# In-memory index: { student_name_lower: {"name":..., "email":..., ...} }
|
| 57 |
+
_METADATA_BY_STUDENT: Dict[str, Dict[str, Any]] = {}
|
| 58 |
+
|
| 59 |
+
def _load_all_metadata() -> None:
|
| 60 |
+
global _METADATA_BY_STUDENT
|
| 61 |
+
_METADATA_BY_STUDENT = {}
|
| 62 |
+
|
| 63 |
+
# A) Per-student folders
|
| 64 |
+
if STUDENTS_DIR.exists():
|
| 65 |
+
for student_dir in sorted(STUDENTS_DIR.iterdir()):
|
| 66 |
+
if not student_dir.is_dir():
|
| 67 |
+
continue
|
| 68 |
+
meta_file = student_dir / "metadata.json"
|
| 69 |
+
if meta_file.exists():
|
| 70 |
+
try:
|
| 71 |
+
data = json.loads(meta_file.read_text(encoding="utf-8"))
|
| 72 |
+
except Exception:
|
| 73 |
+
continue
|
| 74 |
+
# Ensure a 'name' field; default to directory name
|
| 75 |
+
name = data.get("name") or student_dir.name
|
| 76 |
+
data["name"] = name
|
| 77 |
+
data["__path"] = str(meta_file)
|
| 78 |
+
_METADATA_BY_STUDENT[_norm(name).lower()] = data
|
| 79 |
+
|
| 80 |
+
# B) Optional top-level metadata.json (may contain a list or a dict of students)
|
| 81 |
+
top_meta = DATA_ROOT / "metadata.json"
|
| 82 |
+
if top_meta.exists():
|
| 83 |
+
try:
|
| 84 |
+
blob = json.loads(top_meta.read_text(encoding="utf-8"))
|
| 85 |
+
# Accept either:
|
| 86 |
+
# - {"students":[{...},{...}]}
|
| 87 |
+
# - [{"name":..., ...}, ...]
|
| 88 |
+
# - {"<name>": {...}, ...}
|
| 89 |
+
candidates: List[Dict[str, Any]] = []
|
| 90 |
+
if isinstance(blob, dict) and "students" in blob and isinstance(blob["students"], list):
|
| 91 |
+
candidates = blob["students"]
|
| 92 |
+
elif isinstance(blob, list):
|
| 93 |
+
candidates = blob
|
| 94 |
+
elif isinstance(blob, dict):
|
| 95 |
+
for k, v in blob.items():
|
| 96 |
+
if isinstance(v, dict):
|
| 97 |
+
v.setdefault("name", k)
|
| 98 |
+
candidates.append(v)
|
| 99 |
+
|
| 100 |
+
for data in candidates:
|
| 101 |
+
name = data.get("name")
|
| 102 |
+
if not name:
|
| 103 |
+
continue
|
| 104 |
+
data["__path"] = str(top_meta)
|
| 105 |
+
_METADATA_BY_STUDENT[_norm(name).lower()] = data
|
| 106 |
+
except Exception:
|
| 107 |
+
pass
|
| 108 |
+
|
| 109 |
+
# Initial load
|
| 110 |
+
_load_all_metadata()
|
| 111 |
+
|
| 112 |
+
# -------- OpenAI embeddings (for Pinecone RAG) --------
|
| 113 |
+
from openai import OpenAI
|
| 114 |
+
_openai_client: Optional[OpenAI] = None
|
| 115 |
+
_EMBED_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
|
| 116 |
+
def _embed(texts: List[str]) -> List[List[float]]:
|
| 117 |
+
"""
|
| 118 |
+
Embed a batch of strings using OpenAI embeddings.
|
| 119 |
+
"""
|
| 120 |
+
global _openai_client
|
| 121 |
+
if _openai_client is None:
|
| 122 |
+
_openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 123 |
+
# OpenAI Python SDK v1 returns .data with embeddings in order
|
| 124 |
+
resp = _openai_client.embeddings.create(model=_EMBED_MODEL, input=texts)
|
| 125 |
+
return [d.embedding for d in resp.data]
|
| 126 |
+
|
| 127 |
+
# -------- Pinecone client --------
|
| 128 |
+
from pinecone import Pinecone as _Pinecone
|
| 129 |
+
|
| 130 |
+
_pine: Optional[_Pinecone] = None
|
| 131 |
+
def _pc() -> _Pinecone:
|
| 132 |
+
global _pine
|
| 133 |
+
if _pine is None:
|
| 134 |
+
api_key = os.getenv("PINECONE_API_KEY")
|
| 135 |
+
if not api_key:
|
| 136 |
+
raise RuntimeError("PINECONE_API_KEY not set")
|
| 137 |
+
_pine = _Pinecone(api_key=api_key)
|
| 138 |
+
return _pine
|
| 139 |
+
|
| 140 |
+
def _pine_index():
|
| 141 |
+
pc = _pc()
|
| 142 |
+
index_name = os.getenv("PINECONE_INDEX_NAME")
|
| 143 |
+
if not index_name:
|
| 144 |
+
raise RuntimeError("PINECONE_INDEX_NAME not set")
|
| 145 |
+
return pc.Index(index_name)
|
| 146 |
+
|
| 147 |
+
# -------- MCP server --------
|
| 148 |
+
mcp = FastMCP("ProjectRAGServer")
|
| 149 |
+
|
| 150 |
+
@mcp.tool
|
| 151 |
+
def add(a: int, b: int) -> Dict[str, Any]:
|
| 152 |
+
"""Add two numbers."""
|
| 153 |
+
return {"result": int(a) + int(b), "_receipt": _receipt()}
|
| 154 |
+
|
| 155 |
+
@mcp.tool
|
| 156 |
+
def list_students() -> Dict[str, Any]:
|
| 157 |
+
"""Return all known student names."""
|
| 158 |
+
names = sorted(v.get("name") for v in _METADATA_BY_STUDENT.values() if v.get("name"))
|
| 159 |
+
return {"students": names, "count": len(names), "_receipt": _receipt()}
|
| 160 |
+
|
| 161 |
+
@mcp.tool
|
| 162 |
+
def get_student_metadata(name: str) -> Dict[str, Any]:
|
| 163 |
+
"""Return full metadata for a student by name (case-insensitive)."""
|
| 164 |
+
key = _norm(name).lower()
|
| 165 |
+
data = _METADATA_BY_STUDENT.get(key)
|
| 166 |
+
if not data:
|
| 167 |
+
return {"error": f"Student '{name}' not found.", "_receipt": _receipt()}
|
| 168 |
+
return {"metadata": data, "_receipt": _receipt()}
|
| 169 |
+
|
| 170 |
+
@mcp.tool
|
| 171 |
+
def get_student_email(name: str) -> Dict[str, Any]:
|
| 172 |
+
"""Return the email address for a student by name."""
|
| 173 |
+
key = _norm(name).lower()
|
| 174 |
+
data = _METADATA_BY_STUDENT.get(key)
|
| 175 |
+
if not data:
|
| 176 |
+
return {"error": f"Student '{name}' not found.", "_receipt": _receipt()}
|
| 177 |
+
email = data.get("email")
|
| 178 |
+
if not email:
|
| 179 |
+
return {"error": f"No email in metadata for '{data.get('name')}'.", "_receipt": _receipt()}
|
| 180 |
+
return {"name": data.get("name"), "email": email, "_receipt": _receipt()}
|
| 181 |
+
|
| 182 |
+
@mcp.tool
|
| 183 |
+
def search_student_by_field(field: str, value: str) -> Dict[str, Any]:
|
| 184 |
+
"""
|
| 185 |
+
Case-insensitive contains() search across any metadata field.
|
| 186 |
+
Example: field='department', value='Computer'
|
| 187 |
+
"""
|
| 188 |
+
f = _norm(field)
|
| 189 |
+
val = _norm(value)
|
| 190 |
+
if not f:
|
| 191 |
+
return {"error": "Field must be provided.", "_receipt": _receipt()}
|
| 192 |
+
matches: List[Dict[str, Any]] = []
|
| 193 |
+
for meta in _METADATA_BY_STUDENT.values():
|
| 194 |
+
if f not in meta:
|
| 195 |
+
continue
|
| 196 |
+
v = meta.get(f)
|
| 197 |
+
# Allow both strings and list-of-strings
|
| 198 |
+
if isinstance(v, str) and _ci_contains(v, val):
|
| 199 |
+
matches.append({"name": meta.get("name"), "match_value": v, "metadata": meta})
|
| 200 |
+
elif isinstance(v, list) and any(_ci_contains(x, val) for x in v if isinstance(x, str)):
|
| 201 |
+
matches.append({"name": meta.get("name"), "match_value": v, "metadata": meta})
|
| 202 |
+
return {"matches": matches, "count": len(matches), "_receipt": _receipt()}
|
| 203 |
+
|
| 204 |
+
@mcp.tool
|
| 205 |
+
def reload_metadata() -> Dict[str, Any]:
|
| 206 |
+
"""Reload metadata from disk (useful after updating files)."""
|
| 207 |
+
t0 = time.time()
|
| 208 |
+
_load_all_metadata()
|
| 209 |
+
dt = round((time.time() - t0) * 1000.0, 2)
|
| 210 |
+
return {"ok": True, "students": len(_METADATA_BY_STUDENT), "ms": dt, "_receipt": _receipt()}
|
| 211 |
+
|
| 212 |
+
@mcp.tool
|
| 213 |
+
def search_rag(query: str, top_k: int = 3, namespace: Optional[str] = None) -> Dict[str, Any]:
|
| 214 |
+
"""
|
| 215 |
+
Semantic search over your Pinecone index using OpenAI embeddings.
|
| 216 |
+
Returns top_k matches with metadata.
|
| 217 |
+
Env required: OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX_NAME
|
| 218 |
+
Optional: OPENAI_EMBEDDING_MODEL (default: text-embedding-3-small)
|
| 219 |
+
"""
|
| 220 |
+
q = _norm(query)
|
| 221 |
+
if not q:
|
| 222 |
+
return {"error": "Query must not be empty.", "_receipt": _receipt()}
|
| 223 |
+
|
| 224 |
+
try:
|
| 225 |
+
vec = _embed([q])[0]
|
| 226 |
+
except Exception as e:
|
| 227 |
+
return {"error": f"Embedding failed: {e}", "_receipt": _receipt()}
|
| 228 |
+
|
| 229 |
+
try:
|
| 230 |
+
idx = _pine_index()
|
| 231 |
+
except Exception as e:
|
| 232 |
+
return {"error": f"Pinecone init failed: {e}", "_receipt": _receipt()}
|
| 233 |
+
|
| 234 |
+
try:
|
| 235 |
+
resp = idx.query(
|
| 236 |
+
vector=vec,
|
| 237 |
+
top_k=max(1, min(int(top_k), 50)),
|
| 238 |
+
include_metadata=True,
|
| 239 |
+
namespace=namespace or None,
|
| 240 |
+
)
|
| 241 |
+
except Exception as e:
|
| 242 |
+
return {"error": f"Pinecone query failed: {e}", "_receipt": _receipt()}
|
| 243 |
+
|
| 244 |
+
results = []
|
| 245 |
+
for m in getattr(resp, "matches", []) or []:
|
| 246 |
+
results.append({
|
| 247 |
+
"id": getattr(m, "id", None),
|
| 248 |
+
"score": getattr(m, "score", None),
|
| 249 |
+
"metadata": getattr(m, "metadata", None),
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
return {
|
| 253 |
+
"query": q,
|
| 254 |
+
"model": _EMBED_MODEL,
|
| 255 |
+
"top_k": top_k,
|
| 256 |
+
"results": results,
|
| 257 |
+
"_receipt": _receipt(),
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
# ---- HTTP runner for HF Space / local run ----
|
| 261 |
+
if __name__ == "__main__":
|
| 262 |
+
import uvicorn
|
| 263 |
+
port = int(os.environ.get("PORT", 7860))
|
| 264 |
+
# HTTP, path /mcp
|
| 265 |
+
mcp.run(transport="http", host="0.0.0.0", port=port, path="/mcp")
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastmcp
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn
|
| 4 |
+
httpx
|
| 5 |
+
pandas
|
| 6 |
+
python-dateutil
|
| 7 |
+
pinecone
|
| 8 |
+
openai
|
| 9 |
+
python-dotenv
|
space.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sdk: docker
|
| 2 |
+
app_port: 7860
|