File size: 6,454 Bytes
e5ec5b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea1183
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import sys
import tempfile
import shutil
from datetime import datetime

# CRITICAL: Set cache directories BEFORE importing any HuggingFace libraries
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers'
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/tmp/sentence_transformers'
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets'

# Create cache directories
cache_dirs = ['/tmp/transformers', '/tmp/sentence_transformers', '/tmp/huggingface', '/tmp/datasets']
for cache_dir in cache_dirs:
    try:
        os.makedirs(cache_dir, exist_ok=True)
        print(f"✓ Created cache directory: {cache_dir}")
    except PermissionError as e:
        print(f"✗ Failed to create {cache_dir}: {e}")
        sys.exit(1)

# Now import other modules
from flask import Flask, request, jsonify, render_template, redirect, url_for, flash
import asyncio
from retriever.document_store import DocumentStore
from retriever.rag_pipeline import RAGPipeline
from models.model_loader import load_llm
from config import Config

app = Flask(__name__)
app.config.from_object(Config)
app.secret_key = os.getenv('SECRET_KEY', 'your-secret-key-here')

# Initialize components
print("===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
print("Initializing document store...")
print(f"Using vector DB path: {os.getenv('VECTOR_DB_PATH', 'data/vector_store')}")

document_store = DocumentStore()
print("Document store initialized")

print("Loading LLM...")
llm = load_llm(api_key=app.config["LLAMA_API_KEY"])
print("LLM loaded")

print("Initializing RAG pipeline...")
rag_pipeline = RAGPipeline(document_store, llm)
print("RAG pipeline initialized")

@app.route('/')
def index():
    """Home page"""
    return render_template('index.html')

@app.route('/add_data', methods=['GET', 'POST'])
def add_data():
    """Add data to the document store"""
    if request.method == 'POST':
        content = request.form.get('content')
        title = request.form.get('title', 'Untitled')
        
        if content:
            try:
                document_store.add_text(content=content, title=title)
                flash('Data added successfully!', 'success')
                return redirect(url_for('index'))
            except Exception as e:
                flash(f'Error adding data: {str(e)}', 'error')
        else:
            flash('Content is required', 'error')
    
    return render_template('add_data.html')

@app.route('/upload_file', methods=['POST'])
def upload_file():
    """Upload and process a file (PDF, TXT, etc.)"""
    if 'file' not in request.files:
        flash('No file selected', 'error')
        return redirect(url_for('add_data'))
    
    file = request.files['file']
    if file.filename == '':
        flash('No file selected', 'error')
        return redirect(url_for('add_data'))
    
    if file:
        try:
            # Create a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp_file:
                file.save(tmp_file.name)
                
                # Process the file and add to document store
                document_store.add_document(tmp_file.name)
                
                # Clean up temporary file
                os.unlink(tmp_file.name)
                
            flash('File uploaded and processed successfully!', 'success')
        except Exception as e:
            flash(f'Error processing file: {str(e)}', 'error')
        
        return redirect(url_for('add_data'))

@app.route('/api/generate', methods=['POST'])
async def api_generate():
    """API endpoint to generate text based on stored data"""
    data = request.json
    query = data.get('query', '')
    gen_type = data.get('type', 'bio')  # bio, cover_letter, general
    
    if not query:
        return jsonify({"error": "Query is required"}), 400
    
    try:
        # Generate response using RAG pipeline
        response = await rag_pipeline.generate(query, gen_type)
        return jsonify({"response": response})
    except Exception as e:
        return jsonify({"error": f"Error generating response: {str(e)}"}), 500

@app.route('/generate', methods=['GET', 'POST'])
def generate():
    """Generate text based on a query and display results"""
    if request.method == 'POST':
        query = request.form.get('query', '')
        gen_type = request.form.get('type', 'bio')
        
        if query:
            try:
                # Run the async function using asyncio
                response = asyncio.run(rag_pipeline.generate(query, gen_type))
                return render_template('generate.html', query=query, response=response, gen_type=gen_type)
            except Exception as e:
                flash(f'Error generating response: {str(e)}', 'error')
                return render_template('generate.html', query=query, error=str(e))
        else:
            flash('Query is required', 'error')
    
    return render_template('generate.html')

@app.route('/debug/documents', methods=['GET'])
def debug_documents():
    """Debug endpoint to view stored documents"""
    try:
        doc_count = len(document_store.documents)
        chunk_count = sum(len(doc.get('chunks', [])) for doc in document_store.documents.values())
        
        docs_summary = []
        for doc_id, doc in document_store.documents.items():
            docs_summary.append({
                "id": doc_id,
                "title": doc.get("title", "Untitled"),
                "chunks": len(doc.get("chunks", [])),
                "first_chunk_preview": doc.get("chunks", [""])[0][:100] + "..." if doc.get("chunks") else ""
            })
        
        return render_template(
            'debug.html', 
            doc_count=doc_count,
            chunk_count=chunk_count,
            docs=docs_summary
        )
    except Exception as e:
        return f"Error in debug endpoint: {str(e)}", 500

@app.route('/health')
def health_check():
    """Health check endpoint"""
    return jsonify({"status": "healthy", "message": "RAG application is running"})

if __name__ == '__main__':
    # Ensure data directory exists
    os.makedirs("data", exist_ok=True)
    
    # Get port from environment variable (Hugging Face Spaces uses PORT)
    port = int(os.environ.get('PORT', 7860))
    
    print(f"Starting Flask app on port {port}")
    app.run(host='0.0.0.0', port=port, debug=False)