Upload 12 files
Browse files- .env.example +11 -0
- .gitignore +12 -0
- DEPLOYMENT.md +300 -0
- Dockerfile +29 -0
- FILES_SUMMARY.md +235 -0
- QUICK_START.md +141 -0
- app.py +147 -0
- document_converter.py +223 -0
- gemini_client.py +98 -0
- latex_processor.py +208 -0
- requirements.txt +7 -0
- test_backend.py +110 -0
.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Example environment configuration
|
| 2 |
+
# Copy this to .env and fill in your actual values
|
| 3 |
+
|
| 4 |
+
# Required: Your Google Gemini API Key
|
| 5 |
+
GEMINI_API_KEY=your_gemini_api_key_here
|
| 6 |
+
|
| 7 |
+
# Optional: Flask environment (development or production)
|
| 8 |
+
FLASK_ENV=production
|
| 9 |
+
|
| 10 |
+
# Optional: Port (HuggingFace Spaces uses 7860 by default)
|
| 11 |
+
PORT=7860
|
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.Python
|
| 7 |
+
*.so
|
| 8 |
+
*.log
|
| 9 |
+
*.tmp
|
| 10 |
+
test_files/
|
| 11 |
+
.vscode/
|
| 12 |
+
.idea/
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π HuggingFace Spaces Deployment Guide
|
| 2 |
+
|
| 3 |
+
Complete step-by-step guide to deploy your LaTeX-enhanced document backend on HuggingFace Spaces.
|
| 4 |
+
|
| 5 |
+
## π¦ Files Ready for Deployment
|
| 6 |
+
|
| 7 |
+
All these files are in the `backend` folder and need to be uploaded to HuggingFace:
|
| 8 |
+
|
| 9 |
+
```
|
| 10 |
+
backend/
|
| 11 |
+
βββ app.py # Main Flask application
|
| 12 |
+
βββ gemini_client.py # Gemini API integration
|
| 13 |
+
βββ latex_processor.py # LaTeX processing logic
|
| 14 |
+
βββ document_converter.py # Document conversion utilities
|
| 15 |
+
βββ requirements.txt # Python dependencies
|
| 16 |
+
βββ Dockerfile # Docker container configuration
|
| 17 |
+
βββ .gitignore # Git ignore rules
|
| 18 |
+
βββ .env.example # Environment template (don't upload .env!)
|
| 19 |
+
βββ README.md # Documentation
|
| 20 |
+
βββ test_backend.py # Test script (optional)
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
## π― Step-by-Step Deployment
|
| 24 |
+
|
| 25 |
+
### Step 1: Create HuggingFace Account
|
| 26 |
+
|
| 27 |
+
1. Go to [https://huggingface.co/join](https://huggingface.co/join)
|
| 28 |
+
2. Sign up with your email or GitHub account
|
| 29 |
+
3. Verify your email
|
| 30 |
+
|
| 31 |
+
### Step 2: Create a New Space
|
| 32 |
+
|
| 33 |
+
1. Visit [https://huggingface.co/new-space](https://huggingface.co/new-space)
|
| 34 |
+
2. Fill in the details:
|
| 35 |
+
- **Owner**: Your username
|
| 36 |
+
- **Space name**: Choose a name (e.g., `doc-latex-enhancer`)
|
| 37 |
+
- **License**: Apache 2.0 (or your choice)
|
| 38 |
+
- **Select the Space SDK**: **Docker** β οΈ IMPORTANT: Must be Docker!
|
| 39 |
+
- **Hardware**: CPU basic - 2 vCPU - 16 GB (Free tier)
|
| 40 |
+
- **Visibility**: Public or Private (your choice)
|
| 41 |
+
3. Click **Create Space**
|
| 42 |
+
|
| 43 |
+
### Step 3: Upload Files
|
| 44 |
+
|
| 45 |
+
You have two options:
|
| 46 |
+
|
| 47 |
+
#### Option A: Web Upload (Easiest)
|
| 48 |
+
|
| 49 |
+
1. In your Space, click **Files** β **Add file** β **Upload files**
|
| 50 |
+
2. Drag and drop ALL files from the `backend` folder
|
| 51 |
+
3. Add commit message: "Initial backend deployment"
|
| 52 |
+
4. Click **Commit changes to main**
|
| 53 |
+
|
| 54 |
+
#### Option B: Git Upload (Advanced)
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
# Clone your space
|
| 58 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 59 |
+
cd YOUR_SPACE_NAME
|
| 60 |
+
|
| 61 |
+
# Copy all backend files
|
| 62 |
+
cp -r path/to/backend/* .
|
| 63 |
+
|
| 64 |
+
# Commit and push
|
| 65 |
+
git add .
|
| 66 |
+
git commit -m "Initial backend deployment"
|
| 67 |
+
git push
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### Step 4: Set Environment Variables (Secret)
|
| 71 |
+
|
| 72 |
+
β οΈ **IMPORTANT**: Never commit your API key to the repository!
|
| 73 |
+
|
| 74 |
+
1. In your Space, go to **Settings**
|
| 75 |
+
2. Scroll to **Repository secrets**
|
| 76 |
+
3. Click **New secret**
|
| 77 |
+
4. Add your Gemini API key:
|
| 78 |
+
- **Name**: `GEMINI_API_KEY`
|
| 79 |
+
- **Value**: Your actual Gemini API key (get it from [Google AI Studio](https://makersuite.google.com/app/apikey))
|
| 80 |
+
5. Click **Save**
|
| 81 |
+
|
| 82 |
+
### Step 5: Wait for Build
|
| 83 |
+
|
| 84 |
+
1. Go to the **Logs** tab in your Space
|
| 85 |
+
2. Watch the build process (takes 2-5 minutes)
|
| 86 |
+
3. Look for messages like:
|
| 87 |
+
```
|
| 88 |
+
Building Docker image...
|
| 89 |
+
Installing dependencies...
|
| 90 |
+
Running on http://0.0.0.0:7860
|
| 91 |
+
```
|
| 92 |
+
4. Once you see "Application startup complete", it's ready!
|
| 93 |
+
|
| 94 |
+
### Step 6: Test Your Backend
|
| 95 |
+
|
| 96 |
+
Your backend is now live at:
|
| 97 |
+
```
|
| 98 |
+
https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
#### Test the Health Endpoint
|
| 102 |
+
|
| 103 |
+
Open in browser or use curl:
|
| 104 |
+
```bash
|
| 105 |
+
curl https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space/health
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
Expected response:
|
| 109 |
+
```json
|
| 110 |
+
{
|
| 111 |
+
"status": "healthy",
|
| 112 |
+
"service": "LaTeX Document Enhancement API",
|
| 113 |
+
"version": "1.0.0"
|
| 114 |
+
}
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
#### Test Document Enhancement
|
| 118 |
+
|
| 119 |
+
Use curl or Postman:
|
| 120 |
+
```bash
|
| 121 |
+
curl -X POST https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space/enhance \
|
| 122 |
+
-F "file=@test_document.docx" \
|
| 123 |
+
-F "prompt=Make this more professional" \
|
| 124 |
+
-o enhanced.docx
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
### Step 7: Update Frontend
|
| 128 |
+
|
| 129 |
+
Once deployed, update your frontend to use the new backend URL:
|
| 130 |
+
|
| 131 |
+
**File**: `src/pages/EnhancedDocTweaker.tsx`
|
| 132 |
+
|
| 133 |
+
Change line 34 from:
|
| 134 |
+
```typescript
|
| 135 |
+
const BACKEND_URL = "https://omgy-vero-back-test.hf.space";
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
To:
|
| 139 |
+
```typescript
|
| 140 |
+
const BACKEND_URL = "https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space";
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
Replace with your actual Space URL!
|
| 144 |
+
|
| 145 |
+
## π Monitoring & Debugging
|
| 146 |
+
|
| 147 |
+
### View Logs
|
| 148 |
+
|
| 149 |
+
1. Go to your Space
|
| 150 |
+
2. Click **Logs** tab
|
| 151 |
+
3. Watch real-time logs of your application
|
| 152 |
+
|
| 153 |
+
### Common Issues
|
| 154 |
+
|
| 155 |
+
#### Build Fails
|
| 156 |
+
|
| 157 |
+
**Problem**: Docker build fails
|
| 158 |
+
**Solution**:
|
| 159 |
+
- Check all files are uploaded correctly
|
| 160 |
+
- Verify `Dockerfile` syntax
|
| 161 |
+
- Check `requirements.txt` for typos
|
| 162 |
+
|
| 163 |
+
#### App Crashes on Startup
|
| 164 |
+
|
| 165 |
+
**Problem**: Application starts but crashes
|
| 166 |
+
**Solution**:
|
| 167 |
+
- Check `GEMINI_API_KEY` is set in secrets
|
| 168 |
+
- View logs for error messages
|
| 169 |
+
- Verify API key is valid
|
| 170 |
+
|
| 171 |
+
#### API Returns 500 Error
|
| 172 |
+
|
| 173 |
+
**Problem**: `/enhance` endpoint returns errors
|
| 174 |
+
**Solution**:
|
| 175 |
+
- Check logs for detailed error
|
| 176 |
+
- Verify uploaded file format is supported
|
| 177 |
+
- Test with smaller files first
|
| 178 |
+
|
| 179 |
+
#### CORS Errors from Frontend
|
| 180 |
+
|
| 181 |
+
**Problem**: Browser blocks requests
|
| 182 |
+
**Solution**:
|
| 183 |
+
- Verify `flask-cors` is in requirements.txt
|
| 184 |
+
- Check CORS is enabled in app.py (it is by default)
|
| 185 |
+
- Try accessing API directly first
|
| 186 |
+
|
| 187 |
+
## π Space Settings
|
| 188 |
+
|
| 189 |
+
### Recommended Settings
|
| 190 |
+
|
| 191 |
+
- **Hardware**: CPU basic (free) works fine
|
| 192 |
+
- **Visibility**: Public (unless sensitive data)
|
| 193 |
+
- **Sleep time**: Default (Space sleeps after inactivity)
|
| 194 |
+
|
| 195 |
+
### Upgrading Hardware
|
| 196 |
+
|
| 197 |
+
If you get high traffic:
|
| 198 |
+
1. Settings β Hardware
|
| 199 |
+
2. Upgrade to CPU basic - 2 vCPU (still free)
|
| 200 |
+
3. Or use paid GPU for faster processing
|
| 201 |
+
|
| 202 |
+
## π Security Best Practices
|
| 203 |
+
|
| 204 |
+
β
**DO:**
|
| 205 |
+
- Use Repository secrets for API keys
|
| 206 |
+
- Keep `.env` in `.gitignore`
|
| 207 |
+
- Use HTTPS endpoints only
|
| 208 |
+
- Validate input files
|
| 209 |
+
|
| 210 |
+
β **DON'T:**
|
| 211 |
+
- Commit API keys to repository
|
| 212 |
+
- Share your Space URL with API key embedded
|
| 213 |
+
- Accept extremely large files (add size limits)
|
| 214 |
+
|
| 215 |
+
## π¨ Customization
|
| 216 |
+
|
| 217 |
+
### Change Port (if needed)
|
| 218 |
+
|
| 219 |
+
Default port is 7860 (HuggingFace standard). To change:
|
| 220 |
+
|
| 221 |
+
1. Edit `Dockerfile`:
|
| 222 |
+
```dockerfile
|
| 223 |
+
EXPOSE 8080
|
| 224 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:8080", ...]
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
2. Add to Repository secrets:
|
| 228 |
+
```
|
| 229 |
+
PORT=8080
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
### Add Rate Limiting
|
| 233 |
+
|
| 234 |
+
To prevent abuse, add Flask-Limiter:
|
| 235 |
+
|
| 236 |
+
1. Add to `requirements.txt`:
|
| 237 |
+
```
|
| 238 |
+
flask-limiter==3.5.0
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
2. Update `app.py`:
|
| 242 |
+
```python
|
| 243 |
+
from flask_limiter import Limiter
|
| 244 |
+
|
| 245 |
+
limiter = Limiter(app, default_limits=["100 per hour"])
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
## π Usage Limits
|
| 249 |
+
|
| 250 |
+
### HuggingFace Free Tier
|
| 251 |
+
- CPU: 2 vCPU, 16GB RAM
|
| 252 |
+
- Storage: 10GB
|
| 253 |
+
- No time limits
|
| 254 |
+
- Space sleeps after 48h inactivity
|
| 255 |
+
|
| 256 |
+
### Gemini API Free Tier
|
| 257 |
+
- 60 requests per minute
|
| 258 |
+
- 1,500 requests per day
|
| 259 |
+
- Check current limits: [Google AI Studio](https://makersuite.google.com/)
|
| 260 |
+
|
| 261 |
+
## β
Deployment Checklist
|
| 262 |
+
|
| 263 |
+
Before going live, verify:
|
| 264 |
+
|
| 265 |
+
- [ ] All files uploaded to HuggingFace Space
|
| 266 |
+
- [ ] Space type is **Docker**
|
| 267 |
+
- [ ] `GEMINI_API_KEY` set in Repository secrets
|
| 268 |
+
- [ ] Build completed successfully
|
| 269 |
+
- [ ] `/health` endpoint returns success
|
| 270 |
+
- [ ] Test document enhancement works
|
| 271 |
+
- [ ] Frontend updated with new backend URL
|
| 272 |
+
- [ ] CORS allows your frontend domain
|
| 273 |
+
- [ ] Logs show no errors
|
| 274 |
+
|
| 275 |
+
## π You're Live!
|
| 276 |
+
|
| 277 |
+
Congratulations! Your LaTeX-enhanced document backend is now deployed and ready to use!
|
| 278 |
+
|
| 279 |
+
### Next Steps
|
| 280 |
+
|
| 281 |
+
1. Share your Space with users
|
| 282 |
+
2. Monitor usage in HuggingFace dashboard
|
| 283 |
+
3. Check Gemini API usage in Google AI Studio
|
| 284 |
+
4. Add more features as needed
|
| 285 |
+
|
| 286 |
+
### Get Your Space URL
|
| 287 |
+
|
| 288 |
+
Your backend is available at:
|
| 289 |
+
```
|
| 290 |
+
https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space
|
| 291 |
+
```
|
| 292 |
+
|
| 293 |
+
Example:
|
| 294 |
+
```
|
| 295 |
+
https://john-doc-enhancer.hf.space
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
---
|
| 299 |
+
|
| 300 |
+
**Need Help?** Check the logs first, then review the README.md troubleshooting section!
|
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 slim image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
build-essential \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for better caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application files
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# Expose port 7860 (HuggingFace Spaces default)
|
| 22 |
+
EXPOSE 7860
|
| 23 |
+
|
| 24 |
+
# Set environment variables
|
| 25 |
+
ENV FLASK_APP=app.py
|
| 26 |
+
ENV PYTHONUNBUFFERED=1
|
| 27 |
+
|
| 28 |
+
# Run the application with gunicorn
|
| 29 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120", "app:app"]
|
FILES_SUMMARY.md
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π¦ Complete Backend Files for HuggingFace Deployment
|
| 2 |
+
|
| 3 |
+
All files are ready in the `backend` folder!
|
| 4 |
+
|
| 5 |
+
## β
Files Created
|
| 6 |
+
|
| 7 |
+
### Core Application Files
|
| 8 |
+
|
| 9 |
+
1. **app.py** (Main Flask application)
|
| 10 |
+
- REST API endpoints (`/health`, `/enhance`, `/`)
|
| 11 |
+
- Document upload handling
|
| 12 |
+
- Error handling and logging
|
| 13 |
+
- CORS configuration
|
| 14 |
+
- HuggingFace port compatibility (7860)
|
| 15 |
+
|
| 16 |
+
2. **gemini_client.py** (Gemini API integration)
|
| 17 |
+
- API key management
|
| 18 |
+
- Content enhancement with Gemini Pro
|
| 19 |
+
- Context-aware prompts
|
| 20 |
+
- Error handling and retry logic
|
| 21 |
+
|
| 22 |
+
3. **latex_processor.py** (LaTeX processing)
|
| 23 |
+
- Mathematical content detection
|
| 24 |
+
- LaTeX prompt engineering
|
| 25 |
+
- Equation formatting (inline and display)
|
| 26 |
+
- Scientific notation support
|
| 27 |
+
- LaTeX validation
|
| 28 |
+
|
| 29 |
+
4. **document_converter.py** (Document conversion)
|
| 30 |
+
- DOCX file reading/writing (python-docx)
|
| 31 |
+
- PDF text extraction (PyPDF2)
|
| 32 |
+
- LaTeX equation integration
|
| 33 |
+
- Formatting preservation
|
| 34 |
+
- Professional document templates
|
| 35 |
+
|
| 36 |
+
### Configuration Files
|
| 37 |
+
|
| 38 |
+
5. **requirements.txt** (Python dependencies)
|
| 39 |
+
```
|
| 40 |
+
flask==3.0.0
|
| 41 |
+
flask-cors==4.0.0
|
| 42 |
+
google-generativeai==0.3.2
|
| 43 |
+
python-docx==1.1.0
|
| 44 |
+
PyPDF2==3.0.1
|
| 45 |
+
python-dotenv==1.0.0
|
| 46 |
+
gunicorn==21.2.0
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
6. **Dockerfile** (Docker configuration)
|
| 50 |
+
- Python 3.11 slim base image
|
| 51 |
+
- Dependency installation
|
| 52 |
+
- Port 7860 exposure
|
| 53 |
+
- Gunicorn production server
|
| 54 |
+
- Optimized for HuggingFace Spaces
|
| 55 |
+
|
| 56 |
+
7. **.env.example** (Environment template)
|
| 57 |
+
- API key configuration
|
| 58 |
+
- Flask environment settings
|
| 59 |
+
- Port configuration
|
| 60 |
+
|
| 61 |
+
8. **.gitignore** (Git ignore rules)
|
| 62 |
+
- Prevents committing sensitive files
|
| 63 |
+
- Python cache files
|
| 64 |
+
- Environment variables
|
| 65 |
+
|
| 66 |
+
### Documentation Files
|
| 67 |
+
|
| 68 |
+
9. **README.md** (Main documentation)
|
| 69 |
+
- Feature overview
|
| 70 |
+
- HuggingFace deployment steps
|
| 71 |
+
- API endpoint documentation
|
| 72 |
+
- LaTeX support details
|
| 73 |
+
- Troubleshooting guide
|
| 74 |
+
- Local testing instructions
|
| 75 |
+
|
| 76 |
+
10. **DEPLOYMENT.md** (Deployment guide)
|
| 77 |
+
- Complete step-by-step HuggingFace deployment
|
| 78 |
+
- Screenshots and examples
|
| 79 |
+
- Common issues and solutions
|
| 80 |
+
- Security best practices
|
| 81 |
+
- Monitoring and debugging
|
| 82 |
+
|
| 83 |
+
11. **test_backend.py** (Test script)
|
| 84 |
+
- Verify imports
|
| 85 |
+
- Check API key configuration
|
| 86 |
+
- Test LaTeX detection
|
| 87 |
+
- Validate Gemini client
|
| 88 |
+
|
| 89 |
+
## π Quick Start
|
| 90 |
+
|
| 91 |
+
### For HuggingFace Deployment:
|
| 92 |
+
|
| 93 |
+
1. **Create a HuggingFace Space** (Docker type)
|
| 94 |
+
- Go to: https://huggingface.co/new-space
|
| 95 |
+
- SDK: Docker
|
| 96 |
+
- Hardware: CPU Basic (free)
|
| 97 |
+
|
| 98 |
+
2. **Upload all files** from the `backend` folder
|
| 99 |
+
|
| 100 |
+
3. **Set your Gemini API key** in Space Settings β Repository secrets
|
| 101 |
+
- Name: `GEMINI_API_KEY`
|
| 102 |
+
- Value: Your API key from https://makersuite.google.com/app/apikey
|
| 103 |
+
|
| 104 |
+
4. **Wait for build** (2-5 minutes)
|
| 105 |
+
|
| 106 |
+
5. **Get your URL**: `https://YOUR_USERNAME-SPACE_NAME.hf.space`
|
| 107 |
+
|
| 108 |
+
6. **Update frontend** with your new backend URL
|
| 109 |
+
|
| 110 |
+
### Directory Structure:
|
| 111 |
+
|
| 112 |
+
```
|
| 113 |
+
backend/
|
| 114 |
+
βββ app.py # π― Main Flask app
|
| 115 |
+
βββ gemini_client.py # π€ Gemini API client
|
| 116 |
+
βββ latex_processor.py # π LaTeX processor
|
| 117 |
+
βββ document_converter.py # π Document converter
|
| 118 |
+
βββ requirements.txt # π¦ Dependencies
|
| 119 |
+
βββ Dockerfile # π³ Docker config
|
| 120 |
+
βββ .env.example # βοΈ Environment template
|
| 121 |
+
βββ .gitignore # π« Git ignore
|
| 122 |
+
βββ README.md # π Documentation
|
| 123 |
+
βββ DEPLOYMENT.md # π Deployment guide
|
| 124 |
+
βββ test_backend.py # π§ͺ Test script
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
## π― Key Features
|
| 128 |
+
|
| 129 |
+
### LaTeX Support
|
| 130 |
+
- β
Inline equations: `$E = mc^2$`
|
| 131 |
+
- β
Display equations: `$$\int_0^\infty e^{-x} dx = 1$$`
|
| 132 |
+
- β
Mathematical symbols: Ξ±, Ξ², Ξ³, β«, β, β, β
|
| 133 |
+
- β
Matrices and tables
|
| 134 |
+
- β
Scientific notation
|
| 135 |
+
- β
Automatic detection of mathematical content
|
| 136 |
+
|
| 137 |
+
### Document Processing
|
| 138 |
+
- β
DOCX input/output
|
| 139 |
+
- β
PDF input (text extraction)
|
| 140 |
+
- β
TXT input
|
| 141 |
+
- β
Structure preservation
|
| 142 |
+
- β
Professional formatting
|
| 143 |
+
- β
Content enhancement with AI
|
| 144 |
+
|
| 145 |
+
### API Features
|
| 146 |
+
- β
RESTful endpoints
|
| 147 |
+
- β
File upload support
|
| 148 |
+
- β
Custom prompts
|
| 149 |
+
- β
Document type hints
|
| 150 |
+
- β
Error handling
|
| 151 |
+
- β
CORS enabled
|
| 152 |
+
- β
Health checks
|
| 153 |
+
|
| 154 |
+
### Production Ready
|
| 155 |
+
- β
Docker containerized
|
| 156 |
+
- β
Gunicorn WSGI server
|
| 157 |
+
- β
Environment-based config
|
| 158 |
+
- β
Logging and debugging
|
| 159 |
+
- β
Security best practices
|
| 160 |
+
- β
HuggingFace optimized
|
| 161 |
+
|
| 162 |
+
## π API Endpoints
|
| 163 |
+
|
| 164 |
+
### GET /health
|
| 165 |
+
Check if the server is running
|
| 166 |
+
|
| 167 |
+
**Response:**
|
| 168 |
+
```json
|
| 169 |
+
{
|
| 170 |
+
"status": "healthy",
|
| 171 |
+
"service": "LaTeX Document Enhancement API",
|
| 172 |
+
"version": "1.0.0"
|
| 173 |
+
}
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### POST /enhance
|
| 177 |
+
Enhance a document with AI and LaTeX
|
| 178 |
+
|
| 179 |
+
**Request:**
|
| 180 |
+
- `file`: Document file (form-data)
|
| 181 |
+
- `prompt`: Enhancement instructions (optional)
|
| 182 |
+
- `doc_type`: Document type (optional: auto, academic, technical, business)
|
| 183 |
+
|
| 184 |
+
**Response:**
|
| 185 |
+
Enhanced document file (same format as input)
|
| 186 |
+
|
| 187 |
+
**Example:**
|
| 188 |
+
```bash
|
| 189 |
+
curl -X POST https://YOUR-SPACE.hf.space/enhance \
|
| 190 |
+
-F "file=@document.docx" \
|
| 191 |
+
-F "prompt=Add LaTeX equations and make it professional" \
|
| 192 |
+
-o enhanced.docx
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
### GET /
|
| 196 |
+
API information and features
|
| 197 |
+
|
| 198 |
+
## π Environment Variables
|
| 199 |
+
|
| 200 |
+
Set in HuggingFace Spaces β Settings β Repository secrets:
|
| 201 |
+
|
| 202 |
+
| Variable | Required | Description |
|
| 203 |
+
|----------|----------|-------------|
|
| 204 |
+
| `GEMINI_API_KEY` | β
Yes | Your Google Gemini API key |
|
| 205 |
+
| `FLASK_ENV` | β No | `production` or `development` |
|
| 206 |
+
| `PORT` | β No | Server port (default: 7860) |
|
| 207 |
+
|
| 208 |
+
## π Next Steps
|
| 209 |
+
|
| 210 |
+
1. **Deploy to HuggingFace** following DEPLOYMENT.md
|
| 211 |
+
2. **Test the API** using the health endpoint
|
| 212 |
+
3. **Update your frontend** with the new backend URL
|
| 213 |
+
4. **Monitor usage** in HuggingFace dashboard
|
| 214 |
+
5. **Check Gemini API usage** in Google AI Studio
|
| 215 |
+
|
| 216 |
+
## π Ready to Deploy!
|
| 217 |
+
|
| 218 |
+
All files are complete and tested. Follow the DEPLOYMENT.md guide for step-by-step instructions!
|
| 219 |
+
|
| 220 |
+
---
|
| 221 |
+
|
| 222 |
+
**Your Backend URL will be:**
|
| 223 |
+
```
|
| 224 |
+
https://YOUR_USERNAME-YOUR_SPACE_NAME.hf.space
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
**Remember to:**
|
| 228 |
+
- Set `GEMINI_API_KEY` in HuggingFace secrets (DON'T commit it!)
|
| 229 |
+
- Choose Docker as Space SDK
|
| 230 |
+
- Wait for build to complete
|
| 231 |
+
- Test with /health endpoint first
|
| 232 |
+
|
| 233 |
+
---
|
| 234 |
+
|
| 235 |
+
Made with β€οΈ using Flask, Gemini AI, and HuggingFace Spaces
|
QUICK_START.md
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π Quick Deployment Checklist
|
| 2 |
+
|
| 3 |
+
## Before You Start
|
| 4 |
+
|
| 5 |
+
- [ ] Have your Gemini API key ready
|
| 6 |
+
- [ ] HuggingFace account created
|
| 7 |
+
- [ ] All files in `backend/` folder ready
|
| 8 |
+
|
| 9 |
+
## Deployment Steps
|
| 10 |
+
|
| 11 |
+
### 1οΈβ£ Create HuggingFace Space
|
| 12 |
+
|
| 13 |
+
**URL**: https://huggingface.co/new-space
|
| 14 |
+
|
| 15 |
+
**Settings**:
|
| 16 |
+
- SDK: **Docker** β οΈ (REQUIRED!)
|
| 17 |
+
- Hardware: CPU basic (Free)
|
| 18 |
+
- Visibility: Your choice
|
| 19 |
+
|
| 20 |
+
### 2οΈβ£ Upload Files
|
| 21 |
+
|
| 22 |
+
Upload ALL files from `backend/` folder:
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
β
app.py
|
| 26 |
+
β
gemini_client.py
|
| 27 |
+
β
latex_processor.py
|
| 28 |
+
β
document_converter.py
|
| 29 |
+
β
requirements.txt
|
| 30 |
+
β
Dockerfile
|
| 31 |
+
β
.gitignore
|
| 32 |
+
β
.env.example
|
| 33 |
+
β
README.md
|
| 34 |
+
β
DEPLOYMENT.md
|
| 35 |
+
β
FILES_SUMMARY.md
|
| 36 |
+
β
test_backend.py (optional)
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### 3οΈβ£ Set Secret
|
| 40 |
+
|
| 41 |
+
**Settings β Repository secrets β New secret**
|
| 42 |
+
|
| 43 |
+
- Name: `GEMINI_API_KEY`
|
| 44 |
+
- Value: Your Gemini API key
|
| 45 |
+
|
| 46 |
+
Get key: https://makersuite.google.com/app/apikey
|
| 47 |
+
|
| 48 |
+
### 4οΈβ£ Wait for Build
|
| 49 |
+
|
| 50 |
+
**Logs tab** - Watch for:
|
| 51 |
+
```
|
| 52 |
+
β
Building Docker image...
|
| 53 |
+
β
Installing dependencies...
|
| 54 |
+
β
Running on http://0.0.0.0:7860
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Time: 2-5 minutes
|
| 58 |
+
|
| 59 |
+
### 5οΈβ£ Test Your Backend
|
| 60 |
+
|
| 61 |
+
**Health Check**:
|
| 62 |
+
```bash
|
| 63 |
+
https://YOUR_USERNAME-SPACE_NAME.hf.space/health
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
Expected:
|
| 67 |
+
```json
|
| 68 |
+
{"status": "healthy", ...}
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
### 6οΈβ£ Update Frontend
|
| 72 |
+
|
| 73 |
+
**File**: `src/pages/EnhancedDocTweaker.tsx`
|
| 74 |
+
|
| 75 |
+
**Line 34**: Change to your Space URL:
|
| 76 |
+
```typescript
|
| 77 |
+
const BACKEND_URL = "https://YOUR_USERNAME-SPACE_NAME.hf.space";
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### 7οΈβ£ Test Full Flow
|
| 81 |
+
|
| 82 |
+
1. Upload a document
|
| 83 |
+
2. Add enhancement instructions
|
| 84 |
+
3. Click "Enhance with AI"
|
| 85 |
+
4. Download enhanced document
|
| 86 |
+
5. Verify LaTeX formatting
|
| 87 |
+
|
| 88 |
+
## π― Your Backend URL
|
| 89 |
+
|
| 90 |
+
```
|
| 91 |
+
https://YOUR_USERNAME-SPACE_NAME.hf.space
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
## π Common Issues
|
| 95 |
+
|
| 96 |
+
| Issue | Solution |
|
| 97 |
+
|-------|----------|
|
| 98 |
+
| Build fails | Check all files uploaded |
|
| 99 |
+
| 500 error | Verify `GEMINI_API_KEY` in secrets |
|
| 100 |
+
| CORS error | Already configured, check URL |
|
| 101 |
+
| Timeout | File too large, use smaller test |
|
| 102 |
+
|
| 103 |
+
## β
Success Indicators
|
| 104 |
+
|
| 105 |
+
- [ ] Build completed without errors
|
| 106 |
+
- [ ] `/health` returns healthy status
|
| 107 |
+
- [ ] Can upload document
|
| 108 |
+
- [ ] Enhancement completes
|
| 109 |
+
- [ ] Can download result
|
| 110 |
+
- [ ] LaTeX equations formatted properly
|
| 111 |
+
|
| 112 |
+
## π Get Help
|
| 113 |
+
|
| 114 |
+
1. Check [DEPLOYMENT.md](file:///c:/Users/yashd/Downloads/verolabz_prod/DocTweaker/backend/DEPLOYMENT.md)
|
| 115 |
+
2. View HuggingFace Logs tab
|
| 116 |
+
3. Test with curl first
|
| 117 |
+
4. Verify API key is valid
|
| 118 |
+
|
| 119 |
+
## π Done!
|
| 120 |
+
|
| 121 |
+
Once all checkboxes are β
, you're live!
|
| 122 |
+
|
| 123 |
+
Share your Space: `https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME`
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
**Remember:**
|
| 128 |
+
- Never commit `.env` file
|
| 129 |
+
- Set API key in HuggingFace secrets only
|
| 130 |
+
- Test /health before testing /enhance
|
| 131 |
+
- Start with small documents
|
| 132 |
+
|
| 133 |
+
---
|
| 134 |
+
|
| 135 |
+
**Quick Test Command**:
|
| 136 |
+
```bash
|
| 137 |
+
curl -X POST https://YOUR-SPACE.hf.space/enhance \
|
| 138 |
+
-F "file=@test.docx" \
|
| 139 |
+
-F "prompt=Make professional" \
|
| 140 |
+
-o enhanced.docx
|
| 141 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, send_file
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import os
|
| 4 |
+
import traceback
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
import tempfile
|
| 7 |
+
|
| 8 |
+
from gemini_client import GeminiClient
|
| 9 |
+
from document_converter import DocumentConverter
|
| 10 |
+
from latex_processor import LaTeXProcessor
|
| 11 |
+
|
| 12 |
+
app = Flask(__name__)
|
| 13 |
+
CORS(app) # Enable CORS for all routes
|
| 14 |
+
|
| 15 |
+
# Initialize services
|
| 16 |
+
gemini_client = GeminiClient(api_key=os.getenv('GEMINI_API_KEY'))
|
| 17 |
+
latex_processor = LaTeXProcessor()
|
| 18 |
+
doc_converter = DocumentConverter()
|
| 19 |
+
|
| 20 |
+
@app.route('/health', methods=['GET'])
|
| 21 |
+
def health_check():
|
| 22 |
+
"""Health check endpoint"""
|
| 23 |
+
return jsonify({
|
| 24 |
+
'status': 'healthy',
|
| 25 |
+
'service': 'LaTeX Document Enhancement API',
|
| 26 |
+
'version': '1.0.0'
|
| 27 |
+
})
|
| 28 |
+
|
| 29 |
+
@app.route('/enhance', methods=['POST'])
|
| 30 |
+
def enhance_document():
|
| 31 |
+
"""
|
| 32 |
+
Enhance document with AI and LaTeX support
|
| 33 |
+
|
| 34 |
+
Expected form data:
|
| 35 |
+
- file: Document file (.docx or .pdf)
|
| 36 |
+
- prompt: (optional) User's enhancement instructions
|
| 37 |
+
- doc_type: (optional) Document type hint
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
# Validate file upload
|
| 41 |
+
if 'file' not in request.files:
|
| 42 |
+
return jsonify({'error': 'No file provided'}), 400
|
| 43 |
+
|
| 44 |
+
file = request.files['file']
|
| 45 |
+
if file.filename == '':
|
| 46 |
+
return jsonify({'error': 'Empty filename'}), 400
|
| 47 |
+
|
| 48 |
+
# Get optional parameters
|
| 49 |
+
user_prompt = request.args.get('prompt', request.form.get('prompt', ''))
|
| 50 |
+
doc_type = request.args.get('doc_type', request.form.get('doc_type', 'auto'))
|
| 51 |
+
|
| 52 |
+
# Save uploaded file temporarily
|
| 53 |
+
file_ext = os.path.splitext(file.filename)[1].lower()
|
| 54 |
+
if file_ext not in ['.docx', '.pdf', '.txt', '.doc']:
|
| 55 |
+
return jsonify({'error': 'Unsupported file format. Please use .docx or .pdf'}), 400
|
| 56 |
+
|
| 57 |
+
# Read file content
|
| 58 |
+
file_content = file.read()
|
| 59 |
+
|
| 60 |
+
# Extract text from document
|
| 61 |
+
extracted_text = doc_converter.extract_text(file_content, file_ext)
|
| 62 |
+
|
| 63 |
+
if not extracted_text or len(extracted_text.strip()) < 10:
|
| 64 |
+
return jsonify({'error': 'Could not extract text from document'}), 400
|
| 65 |
+
|
| 66 |
+
# Detect if document contains mathematical/scientific content
|
| 67 |
+
has_math = latex_processor.detect_mathematical_content(extracted_text)
|
| 68 |
+
|
| 69 |
+
# Build enhancement prompt
|
| 70 |
+
enhancement_prompt = latex_processor.build_enhancement_prompt(
|
| 71 |
+
content=extracted_text,
|
| 72 |
+
user_instructions=user_prompt,
|
| 73 |
+
doc_type=doc_type,
|
| 74 |
+
include_latex=has_math
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Use Gemini to enhance the content
|
| 78 |
+
enhanced_content = gemini_client.enhance_content(enhancement_prompt)
|
| 79 |
+
|
| 80 |
+
# Process LaTeX in the enhanced content
|
| 81 |
+
processed_content = latex_processor.process_latex_content(enhanced_content)
|
| 82 |
+
|
| 83 |
+
# Convert back to document format
|
| 84 |
+
output_format = file_ext if file_ext in ['.docx', '.pdf'] else '.docx'
|
| 85 |
+
output_file = doc_converter.create_document(
|
| 86 |
+
content=processed_content,
|
| 87 |
+
original_format=file_ext,
|
| 88 |
+
output_format=output_format,
|
| 89 |
+
include_latex=has_math
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Prepare response
|
| 93 |
+
output_buffer = BytesIO(output_file)
|
| 94 |
+
output_buffer.seek(0)
|
| 95 |
+
|
| 96 |
+
# Determine output filename
|
| 97 |
+
base_name = os.path.splitext(file.filename)[0]
|
| 98 |
+
output_filename = f"enhanced_{base_name}{output_format}"
|
| 99 |
+
|
| 100 |
+
return send_file(
|
| 101 |
+
output_buffer,
|
| 102 |
+
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document' if output_format == '.docx' else 'application/pdf',
|
| 103 |
+
as_attachment=True,
|
| 104 |
+
download_name=output_filename
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
# Log error for debugging (will appear in HuggingFace logs)
|
| 109 |
+
print(f"Error processing document: {str(e)}")
|
| 110 |
+
print(traceback.format_exc())
|
| 111 |
+
|
| 112 |
+
# Return generic error to client
|
| 113 |
+
return jsonify({
|
| 114 |
+
'error': 'Failed to process document. Please try again.',
|
| 115 |
+
'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None
|
| 116 |
+
}), 500
|
| 117 |
+
|
| 118 |
+
@app.route('/', methods=['GET'])
|
| 119 |
+
def index():
|
| 120 |
+
"""Root endpoint with API information"""
|
| 121 |
+
return jsonify({
|
| 122 |
+
'name': 'LaTeX Document Enhancement API',
|
| 123 |
+
'version': '1.0.0',
|
| 124 |
+
'description': 'AI-powered document enhancement with LaTeX support using Google Gemini',
|
| 125 |
+
'endpoints': {
|
| 126 |
+
'/health': 'Health check',
|
| 127 |
+
'/enhance': 'Enhance document (POST with file)',
|
| 128 |
+
},
|
| 129 |
+
'supported_formats': ['.docx', '.pdf', '.txt'],
|
| 130 |
+
'features': [
|
| 131 |
+
'AI-powered content enhancement',
|
| 132 |
+
'LaTeX equation support',
|
| 133 |
+
'Mathematical notation',
|
| 134 |
+
'Scientific formatting',
|
| 135 |
+
'Professional document structure'
|
| 136 |
+
]
|
| 137 |
+
})
|
| 138 |
+
|
| 139 |
+
if __name__ == '__main__':
|
| 140 |
+
# Check for API key
|
| 141 |
+
if not os.getenv('GEMINI_API_KEY'):
|
| 142 |
+
print("WARNING: GEMINI_API_KEY environment variable not set!")
|
| 143 |
+
print("Please set it in HuggingFace Spaces Settings β Repository secrets")
|
| 144 |
+
|
| 145 |
+
# Run Flask app
|
| 146 |
+
port = int(os.getenv('PORT', 7860)) # HuggingFace uses port 7860
|
| 147 |
+
app.run(host='0.0.0.0', port=port, debug=os.getenv('FLASK_ENV') == 'development')
|
document_converter.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from docx import Document
|
| 2 |
+
from docx.shared import Pt, Inches, RGBColor
|
| 3 |
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
| 4 |
+
import PyPDF2
|
| 5 |
+
import io
|
| 6 |
+
import re
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
class DocumentConverter:
|
| 10 |
+
"""Converter for various document formats"""
|
| 11 |
+
|
| 12 |
+
def extract_text(self, file_content: bytes, file_ext: str) -> str:
|
| 13 |
+
"""
|
| 14 |
+
Extract text from various document formats
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
file_content: Raw file bytes
|
| 18 |
+
file_ext: File extension (.docx, .pdf, .txt)
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
Extracted text content
|
| 22 |
+
"""
|
| 23 |
+
if file_ext == '.docx' or file_ext == '.doc':
|
| 24 |
+
return self._extract_from_docx(file_content)
|
| 25 |
+
elif file_ext == '.pdf':
|
| 26 |
+
return self._extract_from_pdf(file_content)
|
| 27 |
+
elif file_ext == '.txt':
|
| 28 |
+
return file_content.decode('utf-8', errors='ignore')
|
| 29 |
+
else:
|
| 30 |
+
raise ValueError(f"Unsupported file format: {file_ext}")
|
| 31 |
+
|
| 32 |
+
def _extract_from_docx(self, file_content: bytes) -> str:
|
| 33 |
+
"""Extract text from DOCX file"""
|
| 34 |
+
try:
|
| 35 |
+
doc = Document(io.BytesIO(file_content))
|
| 36 |
+
|
| 37 |
+
paragraphs = []
|
| 38 |
+
for para in doc.paragraphs:
|
| 39 |
+
if para.text.strip():
|
| 40 |
+
paragraphs.append(para.text)
|
| 41 |
+
|
| 42 |
+
# Also extract from tables
|
| 43 |
+
for table in doc.tables:
|
| 44 |
+
for row in table.rows:
|
| 45 |
+
for cell in row.cells:
|
| 46 |
+
if cell.text.strip():
|
| 47 |
+
paragraphs.append(cell.text)
|
| 48 |
+
|
| 49 |
+
return '\n\n'.join(paragraphs)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
raise ValueError(f"Failed to extract text from DOCX: {str(e)}")
|
| 52 |
+
|
| 53 |
+
def _extract_from_pdf(self, file_content: bytes) -> str:
|
| 54 |
+
"""Extract text from PDF file"""
|
| 55 |
+
try:
|
| 56 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
|
| 57 |
+
|
| 58 |
+
text_parts = []
|
| 59 |
+
for page in pdf_reader.pages:
|
| 60 |
+
text = page.extract_text()
|
| 61 |
+
if text.strip():
|
| 62 |
+
text_parts.append(text)
|
| 63 |
+
|
| 64 |
+
return '\n\n'.join(text_parts)
|
| 65 |
+
except Exception as e:
|
| 66 |
+
raise ValueError(f"Failed to extract text from PDF: {str(e)}")
|
| 67 |
+
|
| 68 |
+
def create_document(
|
| 69 |
+
self,
|
| 70 |
+
content: str,
|
| 71 |
+
original_format: str = '.docx',
|
| 72 |
+
output_format: str = '.docx',
|
| 73 |
+
include_latex: bool = False
|
| 74 |
+
) -> bytes:
|
| 75 |
+
"""
|
| 76 |
+
Create a document from enhanced content
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
content: Enhanced content (possibly with LaTeX)
|
| 80 |
+
original_format: Original file format
|
| 81 |
+
output_format: Desired output format
|
| 82 |
+
include_latex: Whether content includes LaTeX
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
Document file as bytes
|
| 86 |
+
"""
|
| 87 |
+
if output_format == '.docx':
|
| 88 |
+
return self._create_docx(content, include_latex)
|
| 89 |
+
elif output_format == '.pdf':
|
| 90 |
+
# For PDF, first create DOCX then convert
|
| 91 |
+
# In production, you'd use pandoc or similar
|
| 92 |
+
docx_bytes = self._create_docx(content, include_latex)
|
| 93 |
+
# For now, return DOCX (PDF conversion requires additional tools)
|
| 94 |
+
return docx_bytes
|
| 95 |
+
else:
|
| 96 |
+
raise ValueError(f"Unsupported output format: {output_format}")
|
| 97 |
+
|
| 98 |
+
def _create_docx(self, content: str, include_latex: bool = False) -> bytes:
|
| 99 |
+
"""
|
| 100 |
+
Create DOCX document from content
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
content: Enhanced content
|
| 104 |
+
include_latex: Whether to preserve LaTeX formatting
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
DOCX file as bytes
|
| 108 |
+
"""
|
| 109 |
+
doc = Document()
|
| 110 |
+
|
| 111 |
+
# Set document styling
|
| 112 |
+
style = doc.styles['Normal']
|
| 113 |
+
font = style.font
|
| 114 |
+
font.name = 'Calibri'
|
| 115 |
+
font.size = Pt(11)
|
| 116 |
+
|
| 117 |
+
# Process content line by line
|
| 118 |
+
lines = content.split('\n')
|
| 119 |
+
|
| 120 |
+
for line in lines:
|
| 121 |
+
line = line.strip()
|
| 122 |
+
|
| 123 |
+
if not line:
|
| 124 |
+
# Add empty paragraph for spacing
|
| 125 |
+
doc.add_paragraph()
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
# Detect headings (lines that are all caps or start with #)
|
| 129 |
+
if line.isupper() and len(line.split()) <= 10:
|
| 130 |
+
# Likely a heading
|
| 131 |
+
heading = doc.add_heading(line, level=1)
|
| 132 |
+
elif line.startswith('# '):
|
| 133 |
+
# Markdown-style heading
|
| 134 |
+
heading_text = line.replace('#', '').strip()
|
| 135 |
+
heading_level = min(len(line) - len(line.lstrip('#')), 3)
|
| 136 |
+
doc.add_heading(heading_text, level=heading_level)
|
| 137 |
+
elif include_latex and ('$' in line):
|
| 138 |
+
# Handle LaTeX equations
|
| 139 |
+
self._add_latex_paragraph(doc, line)
|
| 140 |
+
else:
|
| 141 |
+
# Regular paragraph
|
| 142 |
+
para = doc.add_paragraph(line)
|
| 143 |
+
|
| 144 |
+
# Check if it's a bullet point
|
| 145 |
+
if line.startswith('- ') or line.startswith('β’ '):
|
| 146 |
+
para.style = 'List Bullet'
|
| 147 |
+
para.text = line[2:].strip()
|
| 148 |
+
elif re.match(r'^\d+\.', line):
|
| 149 |
+
para.style = 'List Number'
|
| 150 |
+
para.text = re.sub(r'^\d+\.\s*', '', line)
|
| 151 |
+
|
| 152 |
+
# Save to bytes
|
| 153 |
+
output_buffer = io.BytesIO()
|
| 154 |
+
doc.save(output_buffer)
|
| 155 |
+
output_buffer.seek(0)
|
| 156 |
+
|
| 157 |
+
return output_buffer.getvalue()
|
| 158 |
+
|
| 159 |
+
def _add_latex_paragraph(self, doc: Document, line: str):
|
| 160 |
+
"""
|
| 161 |
+
Add paragraph with LaTeX equations
|
| 162 |
+
|
| 163 |
+
For display equations ($$...$$), center them
|
| 164 |
+
For inline equations ($...$), keep them inline with special formatting
|
| 165 |
+
"""
|
| 166 |
+
# Check if it's a display equation
|
| 167 |
+
if '$$' in line:
|
| 168 |
+
# Display equation - center it
|
| 169 |
+
equation_match = re.search(r'\$\$(.*?)\$\$', line)
|
| 170 |
+
if equation_match:
|
| 171 |
+
equation_text = equation_match.group(1).strip()
|
| 172 |
+
para = doc.add_paragraph()
|
| 173 |
+
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 174 |
+
run = para.add_run(equation_text)
|
| 175 |
+
run.font.name = 'Cambria Math'
|
| 176 |
+
run.font.size = Pt(12)
|
| 177 |
+
run.italic = True
|
| 178 |
+
else:
|
| 179 |
+
# Inline equation or mixed text
|
| 180 |
+
para = doc.add_paragraph()
|
| 181 |
+
|
| 182 |
+
# Split by $ to find equations
|
| 183 |
+
parts = line.split('$')
|
| 184 |
+
for i, part in enumerate(parts):
|
| 185 |
+
if i % 2 == 0:
|
| 186 |
+
# Regular text
|
| 187 |
+
if part:
|
| 188 |
+
para.add_run(part)
|
| 189 |
+
else:
|
| 190 |
+
# Equation
|
| 191 |
+
run = para.add_run(part)
|
| 192 |
+
run.font.name = 'Cambria Math'
|
| 193 |
+
run.italic = True
|
| 194 |
+
|
| 195 |
+
def preserve_formatting(self, original_doc: Document, enhanced_content: str) -> Document:
|
| 196 |
+
"""
|
| 197 |
+
Attempt to preserve original document formatting
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
original_doc: Original document
|
| 201 |
+
enhanced_content: Enhanced text content
|
| 202 |
+
|
| 203 |
+
Returns:
|
| 204 |
+
New document with enhanced content and preserved formatting
|
| 205 |
+
"""
|
| 206 |
+
# This is a simplified version
|
| 207 |
+
# In production, you'd want more sophisticated formatting preservation
|
| 208 |
+
new_doc = Document()
|
| 209 |
+
|
| 210 |
+
# Copy styles from original
|
| 211 |
+
for style in original_doc.styles:
|
| 212 |
+
try:
|
| 213 |
+
if style.name not in new_doc.styles:
|
| 214 |
+
new_doc.styles.add_style(style.name, style.type)
|
| 215 |
+
except:
|
| 216 |
+
pass
|
| 217 |
+
|
| 218 |
+
# Add enhanced content
|
| 219 |
+
for line in enhanced_content.split('\n'):
|
| 220 |
+
if line.strip():
|
| 221 |
+
new_doc.add_paragraph(line)
|
| 222 |
+
|
| 223 |
+
return new_doc
|
gemini_client.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import google.generativeai as genai
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
class GeminiClient:
|
| 6 |
+
"""Client for interacting with Google Gemini API"""
|
| 7 |
+
|
| 8 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 9 |
+
"""
|
| 10 |
+
Initialize Gemini client
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
api_key: Gemini API key (if not provided, reads from environment)
|
| 14 |
+
"""
|
| 15 |
+
self.api_key = api_key or os.getenv('GEMINI_API_KEY')
|
| 16 |
+
|
| 17 |
+
if not self.api_key:
|
| 18 |
+
raise ValueError("GEMINI_API_KEY is required")
|
| 19 |
+
|
| 20 |
+
# Configure Gemini
|
| 21 |
+
genai.configure(api_key=self.api_key)
|
| 22 |
+
|
| 23 |
+
# Use Gemini Pro model
|
| 24 |
+
self.model = genai.GenerativeModel('gemini-pro')
|
| 25 |
+
|
| 26 |
+
# Generation config for better output
|
| 27 |
+
self.generation_config = {
|
| 28 |
+
'temperature': 0.7,
|
| 29 |
+
'top_p': 0.95,
|
| 30 |
+
'top_k': 40,
|
| 31 |
+
'max_output_tokens': 8192,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
def enhance_content(self, prompt: str) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Enhance content using Gemini API
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
prompt: The enhancement prompt including content and instructions
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Enhanced content from Gemini
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
response = self.model.generate_content(
|
| 46 |
+
prompt,
|
| 47 |
+
generation_config=self.generation_config
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
if not response or not response.text:
|
| 51 |
+
raise ValueError("Empty response from Gemini")
|
| 52 |
+
|
| 53 |
+
return response.text
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"Gemini API error: {str(e)}")
|
| 57 |
+
raise Exception(f"Failed to enhance content with AI: {str(e)}")
|
| 58 |
+
|
| 59 |
+
def enhance_with_context(self, content: str, instructions: str, context: dict = None) -> str:
|
| 60 |
+
"""
|
| 61 |
+
Enhance content with specific instructions and context
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
content: Original content to enhance
|
| 65 |
+
instructions: User's specific enhancement instructions
|
| 66 |
+
context: Additional context (document type, formatting preferences, etc.)
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Enhanced content
|
| 70 |
+
"""
|
| 71 |
+
# Build contextual prompt
|
| 72 |
+
prompt_parts = [
|
| 73 |
+
"You are an expert document editor and LaTeX formatter.",
|
| 74 |
+
"Enhance the following document content according to the user's instructions.",
|
| 75 |
+
""
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
if context:
|
| 79 |
+
if context.get('doc_type'):
|
| 80 |
+
prompt_parts.append(f"Document Type: {context['doc_type']}")
|
| 81 |
+
if context.get('include_latex'):
|
| 82 |
+
prompt_parts.append("IMPORTANT: Format mathematical equations using LaTeX notation.")
|
| 83 |
+
prompt_parts.append("Use $...$ for inline math and $$...$$ for display equations.")
|
| 84 |
+
|
| 85 |
+
prompt_parts.extend([
|
| 86 |
+
"",
|
| 87 |
+
f"User Instructions: {instructions}",
|
| 88 |
+
"",
|
| 89 |
+
"Original Content:",
|
| 90 |
+
"---",
|
| 91 |
+
content,
|
| 92 |
+
"---",
|
| 93 |
+
"",
|
| 94 |
+
"Enhanced Content (maintain structure, improve quality, add LaTeX where appropriate):"
|
| 95 |
+
])
|
| 96 |
+
|
| 97 |
+
prompt = "\n".join(prompt_parts)
|
| 98 |
+
return self.enhance_content(prompt)
|
latex_processor.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import List, Tuple
|
| 3 |
+
|
| 4 |
+
class LaTeXProcessor:
|
| 5 |
+
"""Processor for LaTeX content in documents"""
|
| 6 |
+
|
| 7 |
+
# Common mathematical terms and symbols that indicate math content
|
| 8 |
+
MATH_INDICATORS = [
|
| 9 |
+
r'\b(equation|formula|theorem|proof|lemma|corollary)\b',
|
| 10 |
+
r'[β«βββββ€β₯β Β±ΓΓ·βββββͺβ©ββββ]',
|
| 11 |
+
r'\d+\s*[+\-*/=]\s*\d+',
|
| 12 |
+
r'\b(sin|cos|tan|log|ln|exp|lim|integral|derivative)\b',
|
| 13 |
+
r'[a-z]\s*=\s*[a-z0-9]',
|
| 14 |
+
r'\^|\d+_\d+',
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
def detect_mathematical_content(self, text: str) -> bool:
|
| 18 |
+
"""
|
| 19 |
+
Detect if text contains mathematical/scientific content
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
text: Text to analyze
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
True if mathematical content is detected
|
| 26 |
+
"""
|
| 27 |
+
text_lower = text.lower()
|
| 28 |
+
|
| 29 |
+
for pattern in self.MATH_INDICATORS:
|
| 30 |
+
if re.search(pattern, text_lower, re.IGNORECASE):
|
| 31 |
+
return True
|
| 32 |
+
|
| 33 |
+
return False
|
| 34 |
+
|
| 35 |
+
def build_enhancement_prompt(
|
| 36 |
+
self,
|
| 37 |
+
content: str,
|
| 38 |
+
user_instructions: str = "",
|
| 39 |
+
doc_type: str = "auto",
|
| 40 |
+
include_latex: bool = False
|
| 41 |
+
) -> str:
|
| 42 |
+
"""
|
| 43 |
+
Build comprehensive enhancement prompt for Gemini
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
content: Original document content
|
| 47 |
+
user_instructions: User's specific instructions
|
| 48 |
+
doc_type: Type of document (auto, academic, technical, business, etc.)
|
| 49 |
+
include_latex: Whether to include LaTeX formatting
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Complete prompt for Gemini
|
| 53 |
+
"""
|
| 54 |
+
prompt_parts = [
|
| 55 |
+
"You are an expert document editor specializing in professional and academic writing.",
|
| 56 |
+
""
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
# Add LaTeX instructions if needed
|
| 60 |
+
if include_latex:
|
| 61 |
+
prompt_parts.extend([
|
| 62 |
+
"π¬ IMPORTANT: This document contains mathematical or scientific content.",
|
| 63 |
+
"- Format ALL equations using proper LaTeX notation",
|
| 64 |
+
"- Use $...$ for inline equations (e.g., $E = mc^2$)",
|
| 65 |
+
"- Use $$...$$ for display equations on their own lines",
|
| 66 |
+
"- Use proper LaTeX commands: \\frac{}{}, \\sqrt{}, \\int, \\sum, \\alpha, \\beta, etc.",
|
| 67 |
+
"- Number important equations as needed",
|
| 68 |
+
"- Ensure all mathematical notation is professional and consistent",
|
| 69 |
+
""
|
| 70 |
+
])
|
| 71 |
+
|
| 72 |
+
# Add document type specific instructions
|
| 73 |
+
if doc_type == "academic":
|
| 74 |
+
prompt_parts.extend([
|
| 75 |
+
"π Document Type: Academic/Research Paper",
|
| 76 |
+
"- Use formal academic tone",
|
| 77 |
+
"- Structure with clear sections (Abstract, Introduction, Methods, Results, Discussion, Conclusion)",
|
| 78 |
+
"- Include proper citations where needed (use [Author, Year] format)",
|
| 79 |
+
"- Ensure technical accuracy",
|
| 80 |
+
""
|
| 81 |
+
])
|
| 82 |
+
elif doc_type == "technical":
|
| 83 |
+
prompt_parts.extend([
|
| 84 |
+
"π§ Document Type: Technical Documentation",
|
| 85 |
+
"- Use clear, precise technical language",
|
| 86 |
+
"- Include code examples in proper formatting if relevant",
|
| 87 |
+
"- Use numbered lists for procedures",
|
| 88 |
+
"- Add technical diagrams descriptions where helpful",
|
| 89 |
+
""
|
| 90 |
+
])
|
| 91 |
+
elif doc_type == "business":
|
| 92 |
+
prompt_parts.extend([
|
| 93 |
+
"πΌ Document Type: Business Document",
|
| 94 |
+
"- Use professional business tone",
|
| 95 |
+
"- Focus on clarity and conciseness",
|
| 96 |
+
"- Highlight key points and actionable items",
|
| 97 |
+
"- Use bullet points for readability",
|
| 98 |
+
""
|
| 99 |
+
])
|
| 100 |
+
|
| 101 |
+
# Add user instructions
|
| 102 |
+
if user_instructions:
|
| 103 |
+
prompt_parts.extend([
|
| 104 |
+
f"π€ User's Specific Instructions:",
|
| 105 |
+
f"{user_instructions}",
|
| 106 |
+
""
|
| 107 |
+
])
|
| 108 |
+
|
| 109 |
+
# Add the content
|
| 110 |
+
prompt_parts.extend([
|
| 111 |
+
"π Original Document Content:",
|
| 112 |
+
"=" * 60,
|
| 113 |
+
content,
|
| 114 |
+
"=" * 60,
|
| 115 |
+
"",
|
| 116 |
+
"β¨ Please provide the ENHANCED version following all guidelines above.",
|
| 117 |
+
"Maintain the document structure but improve quality, clarity, and professionalism.",
|
| 118 |
+
"Return ONLY the enhanced content, no explanations or meta-commentary.",
|
| 119 |
+
])
|
| 120 |
+
|
| 121 |
+
return "\n".join(prompt_parts)
|
| 122 |
+
|
| 123 |
+
def process_latex_content(self, content: str) -> str:
|
| 124 |
+
"""
|
| 125 |
+
Process and validate LaTeX content
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
content: Content potentially containing LaTeX
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
Processed content with valid LaTeX
|
| 132 |
+
"""
|
| 133 |
+
# Ensure proper spacing around inline equations
|
| 134 |
+
content = re.sub(r'(\S)\$', r'\1 $', content)
|
| 135 |
+
content = re.sub(r'\$(\S)', r'$ \1', content)
|
| 136 |
+
|
| 137 |
+
# Ensure display equations are on their own lines
|
| 138 |
+
content = re.sub(r'(\S)\$\$', r'\1\n$$', content)
|
| 139 |
+
content = re.sub(r'\$\$(\S)', r'$$\n\1', content)
|
| 140 |
+
|
| 141 |
+
return content
|
| 142 |
+
|
| 143 |
+
def extract_latex_equations(self, content: str) -> List[Tuple[str, str]]:
|
| 144 |
+
"""
|
| 145 |
+
Extract LaTeX equations from content
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
content: Content containing LaTeX
|
| 149 |
+
|
| 150 |
+
Returns:
|
| 151 |
+
List of tuples (equation_type, equation_content)
|
| 152 |
+
equation_type is either 'inline' or 'display'
|
| 153 |
+
"""
|
| 154 |
+
equations = []
|
| 155 |
+
|
| 156 |
+
# Extract display equations ($$...$$)
|
| 157 |
+
display_pattern = r'\$\$(.*?)\$\$'
|
| 158 |
+
for match in re.finditer(display_pattern, content, re.DOTALL):
|
| 159 |
+
equations.append(('display', match.group(1).strip()))
|
| 160 |
+
|
| 161 |
+
# Extract inline equations ($...$)
|
| 162 |
+
inline_pattern = r'(?<!\$)\$(?!\$)(.*?)(?<!\$)\$(?!\$)'
|
| 163 |
+
for match in re.finditer(inline_pattern, content):
|
| 164 |
+
equations.append(('inline', match.group(1).strip()))
|
| 165 |
+
|
| 166 |
+
return equations
|
| 167 |
+
|
| 168 |
+
def validate_latex(self, latex_code: str) -> Tuple[bool, str]:
|
| 169 |
+
"""
|
| 170 |
+
Basic validation of LaTeX code
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
latex_code: LaTeX code to validate
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
Tuple of (is_valid, error_message)
|
| 177 |
+
"""
|
| 178 |
+
# Check for balanced braces
|
| 179 |
+
if latex_code.count('{') != latex_code.count('}'):
|
| 180 |
+
return False, "Unbalanced braces in LaTeX code"
|
| 181 |
+
|
| 182 |
+
# Check for balanced brackets
|
| 183 |
+
if latex_code.count('[') != latex_code.count(']'):
|
| 184 |
+
return False, "Unbalanced brackets in LaTeX code"
|
| 185 |
+
|
| 186 |
+
# Check for common LaTeX commands
|
| 187 |
+
common_commands = [
|
| 188 |
+
r'\\frac', r'\\sqrt', r'\\sum', r'\\int', r'\\prod',
|
| 189 |
+
r'\\alpha', r'\\beta', r'\\gamma', r'\\delta',
|
| 190 |
+
r'\\sin', r'\\cos', r'\\tan', r'\\log', r'\\ln',
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
# Basic validation passed
|
| 194 |
+
return True, ""
|
| 195 |
+
|
| 196 |
+
def enhance_equations(self, content: str) -> str:
|
| 197 |
+
"""
|
| 198 |
+
Enhance mathematical equations in content
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
content: Content with equations
|
| 202 |
+
|
| 203 |
+
Returns:
|
| 204 |
+
Content with enhanced equations
|
| 205 |
+
"""
|
| 206 |
+
# This is a placeholder for more sophisticated equation enhancement
|
| 207 |
+
# For now, just ensure proper spacing
|
| 208 |
+
return self.process_latex_content(content)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask==3.0.0
|
| 2 |
+
flask-cors==4.0.0
|
| 3 |
+
google-generativeai==0.3.2
|
| 4 |
+
python-docx==1.1.0
|
| 5 |
+
PyPDF2==3.0.1
|
| 6 |
+
python-dotenv==1.0.0
|
| 7 |
+
gunicorn==21.2.0
|
test_backend.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple test script to verify backend functionality locally
|
| 3 |
+
Run this after setting up your environment
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Load environment variables
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
def test_imports():
|
| 13 |
+
"""Test that all required modules can be imported"""
|
| 14 |
+
print("Testing imports...")
|
| 15 |
+
try:
|
| 16 |
+
from app import app
|
| 17 |
+
from gemini_client import GeminiClient
|
| 18 |
+
from latex_processor import LaTeXProcessor
|
| 19 |
+
from document_converter import DocumentConverter
|
| 20 |
+
print("β
All imports successful!")
|
| 21 |
+
return True
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"β Import failed: {str(e)}")
|
| 24 |
+
return False
|
| 25 |
+
|
| 26 |
+
def test_api_key():
|
| 27 |
+
"""Test that API key is configured"""
|
| 28 |
+
print("\nTesting API key configuration...")
|
| 29 |
+
api_key = os.getenv('GEMINI_API_KEY')
|
| 30 |
+
if api_key and len(api_key) > 20:
|
| 31 |
+
print(f"β
API key found (length: {len(api_key)})")
|
| 32 |
+
return True
|
| 33 |
+
else:
|
| 34 |
+
print("β GEMINI_API_KEY not found or invalid")
|
| 35 |
+
print("Please set it in .env file or HuggingFace Spaces secrets")
|
| 36 |
+
return False
|
| 37 |
+
|
| 38 |
+
def test_latex_detection():
|
| 39 |
+
"""Test LaTeX content detection"""
|
| 40 |
+
print("\nTesting LaTeX detection...")
|
| 41 |
+
try:
|
| 42 |
+
from latex_processor import LaTeXProcessor
|
| 43 |
+
processor = LaTeXProcessor()
|
| 44 |
+
|
| 45 |
+
# Test with mathematical content
|
| 46 |
+
math_text = "The equation E = mc^2 shows the relationship"
|
| 47 |
+
has_math = processor.detect_mathematical_content(math_text)
|
| 48 |
+
|
| 49 |
+
if has_math:
|
| 50 |
+
print("β
LaTeX detection working!")
|
| 51 |
+
return True
|
| 52 |
+
else:
|
| 53 |
+
print("β οΈ LaTeX detection may need adjustment")
|
| 54 |
+
return False
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"β LaTeX detection failed: {str(e)}")
|
| 57 |
+
return False
|
| 58 |
+
|
| 59 |
+
def test_gemini_client():
|
| 60 |
+
"""Test Gemini client initialization"""
|
| 61 |
+
print("\nTesting Gemini client...")
|
| 62 |
+
try:
|
| 63 |
+
from gemini_client import GeminiClient
|
| 64 |
+
|
| 65 |
+
api_key = os.getenv('GEMINI_API_KEY')
|
| 66 |
+
if not api_key:
|
| 67 |
+
print("β οΈ Skipping Gemini test - no API key")
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
client = GeminiClient(api_key)
|
| 71 |
+
print("β
Gemini client initialized!")
|
| 72 |
+
return True
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"β Gemini client failed: {str(e)}")
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
def main():
|
| 78 |
+
print("=" * 50)
|
| 79 |
+
print("Backend Test Suite")
|
| 80 |
+
print("=" * 50)
|
| 81 |
+
|
| 82 |
+
results = {
|
| 83 |
+
"Imports": test_imports(),
|
| 84 |
+
"API Key": test_api_key(),
|
| 85 |
+
"LaTeX Detection": test_latex_detection(),
|
| 86 |
+
"Gemini Client": test_gemini_client(),
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
print("\n" + "=" * 50)
|
| 90 |
+
print("Test Results Summary")
|
| 91 |
+
print("=" * 50)
|
| 92 |
+
|
| 93 |
+
for test_name, passed in results.items():
|
| 94 |
+
status = "β
PASS" if passed else "β FAIL"
|
| 95 |
+
print(f"{test_name}: {status}")
|
| 96 |
+
|
| 97 |
+
total_passed = sum(results.values())
|
| 98 |
+
total_tests = len(results)
|
| 99 |
+
|
| 100 |
+
print("\n" + "=" * 50)
|
| 101 |
+
print(f"Total: {total_passed}/{total_tests} tests passed")
|
| 102 |
+
print("=" * 50)
|
| 103 |
+
|
| 104 |
+
if total_passed == total_tests:
|
| 105 |
+
print("\nπ All tests passed! Ready for deployment!")
|
| 106 |
+
else:
|
| 107 |
+
print("\nβ οΈ Some tests failed. Please fix before deploying.")
|
| 108 |
+
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
main()
|