AAdevloper
Deploy Defence-Tech Investment Scanner v1.0 - Complete AI-powered tool with 42 scored opportunities
7e85722 | """ | |
| Gradio UI for Defence-Tech Investment Scanner. | |
| Deploys to Hugging Face Spaces. | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from pathlib import Path | |
| from datetime import datetime | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class InvestmentDossierApp: | |
| """Gradio app for semantic search and investment dossier generation.""" | |
| def __init__(self, data_file='data/processed/scored_data.parquet'): | |
| """Initialize the app.""" | |
| self.data_file = data_file | |
| self.df = None | |
| self.embeddings = None | |
| self.model = None | |
| self.last_updated = None | |
| self.load_data() | |
| def load_data(self): | |
| """Load processed data and embeddings.""" | |
| logger.info("Loading data...") | |
| # Load scored data | |
| if Path(self.data_file).exists(): | |
| self.df = pd.read_parquet(self.data_file) | |
| logger.info(f"Loaded {len(self.df)} records") | |
| # Extract embeddings | |
| if 'embedding' in self.df.columns: | |
| self.embeddings = np.array(self.df['embedding'].tolist()) | |
| logger.info(f"Loaded embeddings: {self.embeddings.shape}") | |
| # Get last update time | |
| if 'scrape_date' in self.df.columns: | |
| self.last_updated = self.df['scrape_date'].iloc[0][:10] | |
| else: | |
| self.last_updated = datetime.now().strftime('%Y-%m-%d') | |
| else: | |
| logger.warning(f"Data file not found: {self.data_file}") | |
| # Create sample data for demo | |
| self.create_demo_data() | |
| def create_demo_data(self): | |
| """Create demo data if processed data not available.""" | |
| logger.info("Creating demo data...") | |
| self.df = pd.DataFrame({ | |
| 'title': ['Sample Defense Technology'], | |
| 'summary': ['This is a demo entry. Run run_pipeline.py to process real data.'], | |
| 'source': ['DEMO'], | |
| 'date': ['2024-01-01'], | |
| 'investment_score': [50.0], | |
| 'trl': [5], | |
| 'dual_use_score': [50], | |
| 'gov_momentum': [50], | |
| 'ip_moat': [50], | |
| 'theme': ['general'], | |
| 'score_explanation': ['Demo entry'], | |
| 'recommendation': ['Run pipeline to see real recommendations'], | |
| 'url': ['https://example.com'] | |
| }) | |
| self.embeddings = np.random.rand(1, 384) | |
| self.last_updated = datetime.now().strftime('%Y-%m-%d') | |
| def load_model(self): | |
| """Load sentence transformer model.""" | |
| if self.model is None: | |
| logger.info("Loading sentence transformer model...") | |
| self.model = SentenceTransformer('all-MiniLM-L6-v2') | |
| logger.info("Model loaded") | |
| def semantic_search(self, query, top_k=10, source_filter=None): | |
| """Perform semantic search on the dataset.""" | |
| if self.df is None or len(self.df) == 0: | |
| return None, "No data available. Please run the pipeline first." | |
| self.load_model() | |
| # Generate query embedding | |
| query_embedding = self.model.encode([query], convert_to_numpy=True) | |
| # Calculate similarities | |
| similarities = cosine_similarity(query_embedding, self.embeddings)[0] | |
| # Get top k indices | |
| top_indices = np.argsort(similarities)[::-1][:top_k * 3] # Get more for filtering | |
| # Filter by source if specified | |
| results_df = self.df.iloc[top_indices].copy() | |
| results_df['similarity'] = similarities[top_indices] | |
| if source_filter and source_filter != "All": | |
| results_df = results_df[results_df['source'] == source_filter] | |
| # Keep top k after filtering | |
| results_df = results_df.head(top_k) | |
| return results_df, None | |
| def generate_dossier(self, query, top_k=10, source_filter="All"): | |
| """Generate investment dossier for query.""" | |
| results_df, error = self.semantic_search(query, top_k, source_filter) | |
| if error: | |
| return error, None | |
| if results_df is None or len(results_df) == 0: | |
| return "No results found.", None | |
| # Get top result for detailed dossier | |
| top_result = results_df.iloc[0] | |
| # Create dossier text | |
| dossier = f""" | |
| # 📊 INVESTMENT DOSSIER | |
| ## {top_result['title']} | |
| **Source**: {top_result['source']} | **Date**: {top_result['date']} | **Theme**: {top_result['theme']} | |
| --- | |
| ### 📝 Summary | |
| {top_result['summary'][:500]} | |
| --- | |
| ### 💯 Investment Score: {top_result['investment_score']}/100 | |
| **Ranking**: #{top_result['rank']} out of {len(self.df)} opportunities | |
| **Score Breakdown:** | |
| - 🎯 **Dual-Use Potential**: {top_result['dual_use_score']}/100 | |
| - 🔬 **Technology Readiness (TRL)**: {top_result['trl']}/9 | |
| - 🛡️ **IP Moat**: {top_result['ip_moat']}/100 | |
| - 📈 **Government Momentum**: {top_result['gov_momentum']}/100 | |
| - 💰 **Capital Intensity**: {"Low" if top_result['cap_intensity'] == 1 else "Medium" if top_result['cap_intensity'] == 2 else "High"} | |
| --- | |
| ### 🔍 Why This Score? | |
| {top_result['score_explanation']} | |
| --- | |
| ### 💡 Recommendation | |
| **{top_result['recommendation']}** | |
| --- | |
| ### 🔗 Source | |
| {top_result['url']} | |
| --- | |
| *Similarity to query: {results_df.iloc[0]['similarity']:.2%}* | |
| """ | |
| # Create results table | |
| results_table = results_df[[ | |
| 'rank', 'title', 'source', 'investment_score', | |
| 'trl', 'dual_use_score', 'theme', 'similarity' | |
| ]].copy() | |
| results_table['similarity'] = results_table['similarity'].apply(lambda x: f"{x:.1%}") | |
| results_table.columns = ['Rank', 'Title', 'Source', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Match'] | |
| return dossier, results_table | |
| def get_top_opportunities(self, n=20): | |
| """Get top N investment opportunities.""" | |
| if self.df is None or len(self.df) == 0: | |
| return None | |
| top_df = self.df.head(n)[[ | |
| 'rank', 'title', 'source', 'date', 'investment_score', | |
| 'trl', 'dual_use_score', 'theme', 'recommendation' | |
| ]].copy() | |
| top_df.columns = ['Rank', 'Title', 'Source', 'Date', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Recommendation'] | |
| return top_df | |
| def get_source_stats(self): | |
| """Get statistics by source.""" | |
| if self.df is None or len(self.df) == 0: | |
| return None | |
| stats = self.df.groupby('source').agg({ | |
| 'investment_score': ['count', 'mean', 'max'], | |
| 'dual_use_score': 'mean', | |
| 'trl': 'mean' | |
| }).round(1) | |
| stats.columns = ['Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL'] | |
| stats = stats.reset_index() | |
| stats.columns = ['Source', 'Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL'] | |
| return stats | |
| def create_gradio_interface(): | |
| """Create the Gradio interface.""" | |
| app = InvestmentDossierApp() | |
| # Custom CSS | |
| custom_css = """ | |
| .gradio-container { | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css, title="Defence-Tech Investment Scanner") as demo: | |
| # Header | |
| gr.Markdown(f""" | |
| <div class="header"> | |
| <h1>🇮🇳 Bharat Defence-Tech Investment Scanner</h1> | |
| <p>AI-powered semantic search for dual-use technology investments</p> | |
| <p><em>Dataset last updated: {app.last_updated}</em></p> | |
| </div> | |
| """) | |
| # Main search interface | |
| with gr.Tab("🔍 Semantic Search"): | |
| gr.Markdown(""" | |
| Search for technologies, companies, or defense applications using natural language. | |
| The AI will find the most relevant opportunities and generate an investment dossier. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| search_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g., 'satellite communication systems' or 'drone technology for surveillance'", | |
| lines=2 | |
| ) | |
| with gr.Column(scale=1): | |
| source_filter = gr.Dropdown( | |
| choices=["All", "ISRO", "DRDO", "iDEX", "BEL", "HAL", "IN-SPACe", "PIB"], | |
| value="All", | |
| label="Filter by Source" | |
| ) | |
| top_k_slider = gr.Slider( | |
| minimum=5, maximum=30, value=10, step=5, | |
| label="Number of results" | |
| ) | |
| search_button = gr.Button("🔍 Search", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| dossier_output = gr.Markdown(label="Investment Dossier") | |
| with gr.Column(): | |
| results_output = gr.Dataframe(label="Top Matches") | |
| search_button.click( | |
| fn=app.generate_dossier, | |
| inputs=[search_input, top_k_slider, source_filter], | |
| outputs=[dossier_output, results_output] | |
| ) | |
| # Example queries | |
| gr.Markdown("### Example Queries") | |
| examples = gr.Examples( | |
| examples=[ | |
| ["satellite communication technology", 10, "All"], | |
| ["autonomous drone systems", 10, "All"], | |
| ["quantum encryption defense", 10, "All"], | |
| ["radar and surveillance systems", 10, "DRDO"], | |
| ["space launch technology", 10, "ISRO"], | |
| ], | |
| inputs=[search_input, top_k_slider, source_filter] | |
| ) | |
| # Top opportunities tab | |
| with gr.Tab("🏆 Top Opportunities"): | |
| gr.Markdown(""" | |
| View the highest-scoring investment opportunities ranked by our AI scoring algorithm. | |
| """) | |
| n_opportunities = gr.Slider( | |
| minimum=10, maximum=50, value=20, step=10, | |
| label="Number of opportunities to display" | |
| ) | |
| refresh_button = gr.Button("🔄 Refresh Top Opportunities", variant="secondary") | |
| top_opps_output = gr.Dataframe(label="Top Investment Opportunities") | |
| refresh_button.click( | |
| fn=app.get_top_opportunities, | |
| inputs=[n_opportunities], | |
| outputs=[top_opps_output] | |
| ) | |
| # Auto-load on startup | |
| demo.load( | |
| fn=app.get_top_opportunities, | |
| inputs=[gr.Number(value=20, visible=False)], | |
| outputs=[top_opps_output] | |
| ) | |
| # Analytics tab | |
| with gr.Tab("📊 Analytics"): | |
| gr.Markdown(""" | |
| Dataset statistics and source-wise breakdowns. | |
| """) | |
| stats_button = gr.Button("📈 Generate Statistics", variant="secondary") | |
| stats_output = gr.Dataframe(label="Source Statistics") | |
| stats_button.click( | |
| fn=app.get_source_stats, | |
| inputs=[], | |
| outputs=[stats_output] | |
| ) | |
| # Dataset info | |
| gr.Markdown(f""" | |
| ### Dataset Information | |
| - **Total Records**: {len(app.df) if app.df is not None else 0} | |
| - **Sources Covered**: {app.df['source'].nunique() if app.df is not None else 0} | |
| - **Date Range**: {app.df['date'].min() if app.df is not None else 'N/A'} to {app.df['date'].max() if app.df is not None else 'N/A'} | |
| - **Last Updated**: {app.last_updated} | |
| """) | |
| # About tab | |
| with gr.Tab("ℹ️ About"): | |
| gr.Markdown(""" | |
| ## About This Tool | |
| The **Bharat Defence-Tech Investment Scanner** uses AI to identify and score dual-use technology | |
| investment opportunities from India's defence and space ecosystem. | |
| ### 📊 Scoring Methodology | |
| Investment scores (0-100) are calculated using: | |
| - **Dual-Use Potential** (30%): Commercial applicability beyond defence | |
| - **Technology Readiness** (25%): TRL level (1-9 scale) | |
| - **Government Momentum** (20%): Tender activity, procurement signals | |
| - **IP Moat** (15%): Patent strength and technology transfer potential | |
| - **Capital Intensity** (10%): Lower capex = higher score | |
| ### 🎯 Data Sources | |
| - **ISRO**: Space missions, satellite technology | |
| - **DRDO**: Defence R&D, missile systems | |
| - **iDEX**: Defence innovation challenges | |
| - **BEL**: Defence electronics, radar systems | |
| - **HAL**: Aerospace development | |
| - **IN-SPACe**: Private space sector | |
| - **PIB**: Government press releases | |
| ### 🔍 Semantic Search | |
| Powered by sentence-transformers (all-MiniLM-L6-v2) for intelligent matching | |
| beyond keyword search. | |
| ### ⚖️ Limitations & Disclaimers | |
| - Data sourced from public government websites only | |
| - Scores are algorithmic estimates, not investment advice | |
| - Always conduct thorough due diligence | |
| - Update frequency: Weekly (manual curation) + Monthly (automated scraping) | |
| ### 📝 License & Attribution | |
| - All data from public government sources | |
| - Respects robots.txt and rate limits | |
| - Source attribution maintained for all records | |
| --- | |
| **Built with**: Python, Pandas, Sentence-Transformers, Gradio | |
| **Deployed on**: Hugging Face Spaces | |
| **Version**: 1.0 | |
| """) | |
| return demo | |
| def main(): | |
| """Launch the Gradio app.""" | |
| demo = create_gradio_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |
| if __name__ == "__main__": | |
| main() | |