Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import tempfile | |
| import shutil | |
| from datetime import datetime | |
| # CRITICAL: Set cache directories BEFORE importing any HuggingFace libraries | |
| os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers' | |
| os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/tmp/sentence_transformers' | |
| os.environ['HF_HOME'] = '/tmp/huggingface' | |
| os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets' | |
| # Create cache directories | |
| cache_dirs = ['/tmp/transformers', '/tmp/sentence_transformers', '/tmp/huggingface', '/tmp/datasets'] | |
| for cache_dir in cache_dirs: | |
| try: | |
| os.makedirs(cache_dir, exist_ok=True) | |
| print(f"✓ Created cache directory: {cache_dir}") | |
| except PermissionError as e: | |
| print(f"✗ Failed to create {cache_dir}: {e}") | |
| sys.exit(1) | |
| # Now import other modules | |
| from flask import Flask, request, jsonify, render_template, redirect, url_for, flash | |
| import asyncio | |
| from retriever.document_store import DocumentStore | |
| from retriever.rag_pipeline import RAGPipeline | |
| from models.model_loader import load_llm | |
| from config import Config | |
| app = Flask(__name__) | |
| app.config.from_object(Config) | |
| app.secret_key = os.getenv('SECRET_KEY', 'your-secret-key-here') | |
| # Initialize components | |
| print("===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====") | |
| print("Initializing document store...") | |
| print(f"Using vector DB path: {os.getenv('VECTOR_DB_PATH', 'data/vector_store')}") | |
| document_store = DocumentStore() | |
| print("Document store initialized") | |
| print("Loading LLM...") | |
| llm = load_llm(api_key=app.config["LLAMA_API_KEY"]) | |
| print("LLM loaded") | |
| print("Initializing RAG pipeline...") | |
| rag_pipeline = RAGPipeline(document_store, llm) | |
| print("RAG pipeline initialized") | |
| def index(): | |
| """Home page""" | |
| return render_template('index.html') | |
| def add_data(): | |
| """Add data to the document store""" | |
| if request.method == 'POST': | |
| content = request.form.get('content') | |
| title = request.form.get('title', 'Untitled') | |
| if content: | |
| try: | |
| document_store.add_text(content=content, title=title) | |
| flash('Data added successfully!', 'success') | |
| return redirect(url_for('index')) | |
| except Exception as e: | |
| flash(f'Error adding data: {str(e)}', 'error') | |
| else: | |
| flash('Content is required', 'error') | |
| return render_template('add_data.html') | |
| def upload_file(): | |
| """Upload and process a file (PDF, TXT, etc.)""" | |
| if 'file' not in request.files: | |
| flash('No file selected', 'error') | |
| return redirect(url_for('add_data')) | |
| file = request.files['file'] | |
| if file.filename == '': | |
| flash('No file selected', 'error') | |
| return redirect(url_for('add_data')) | |
| if file: | |
| try: | |
| # Create a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp_file: | |
| file.save(tmp_file.name) | |
| # Process the file and add to document store | |
| document_store.add_document(tmp_file.name) | |
| # Clean up temporary file | |
| os.unlink(tmp_file.name) | |
| flash('File uploaded and processed successfully!', 'success') | |
| except Exception as e: | |
| flash(f'Error processing file: {str(e)}', 'error') | |
| return redirect(url_for('add_data')) | |
| async def api_generate(): | |
| """API endpoint to generate text based on stored data""" | |
| data = request.json | |
| query = data.get('query', '') | |
| gen_type = data.get('type', 'bio') # bio, cover_letter, general | |
| if not query: | |
| return jsonify({"error": "Query is required"}), 400 | |
| try: | |
| # Generate response using RAG pipeline | |
| response = await rag_pipeline.generate(query, gen_type) | |
| return jsonify({"response": response}) | |
| except Exception as e: | |
| return jsonify({"error": f"Error generating response: {str(e)}"}), 500 | |
| def generate(): | |
| """Generate text based on a query and display results""" | |
| if request.method == 'POST': | |
| query = request.form.get('query', '') | |
| gen_type = request.form.get('type', 'bio') | |
| if query: | |
| try: | |
| # Run the async function using asyncio | |
| response = asyncio.run(rag_pipeline.generate(query, gen_type)) | |
| return render_template('generate.html', query=query, response=response, gen_type=gen_type) | |
| except Exception as e: | |
| flash(f'Error generating response: {str(e)}', 'error') | |
| return render_template('generate.html', query=query, error=str(e)) | |
| else: | |
| flash('Query is required', 'error') | |
| return render_template('generate.html') | |
| def debug_documents(): | |
| """Debug endpoint to view stored documents""" | |
| try: | |
| doc_count = len(document_store.documents) | |
| chunk_count = sum(len(doc.get('chunks', [])) for doc in document_store.documents.values()) | |
| docs_summary = [] | |
| for doc_id, doc in document_store.documents.items(): | |
| docs_summary.append({ | |
| "id": doc_id, | |
| "title": doc.get("title", "Untitled"), | |
| "chunks": len(doc.get("chunks", [])), | |
| "first_chunk_preview": doc.get("chunks", [""])[0][:100] + "..." if doc.get("chunks") else "" | |
| }) | |
| return render_template( | |
| 'debug.html', | |
| doc_count=doc_count, | |
| chunk_count=chunk_count, | |
| docs=docs_summary | |
| ) | |
| except Exception as e: | |
| return f"Error in debug endpoint: {str(e)}", 500 | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({"status": "healthy", "message": "RAG application is running"}) | |
| if __name__ == '__main__': | |
| # Ensure data directory exists | |
| os.makedirs("data", exist_ok=True) | |
| # Get port from environment variable (Hugging Face Spaces uses PORT) | |
| port = int(os.environ.get('PORT', 7860)) | |
| print(f"Starting Flask app on port {port}") | |
| app.run(host='0.0.0.0', port=port, debug=False) |