AAdevloper
Deploy Defence-Tech Investment Scanner v1.0 - Complete AI-powered tool with 42 scored opportunities
7e85722
"""
Gradio UI for Defence-Tech Investment Scanner.
Deploys to Hugging Face Spaces.
"""
import gradio as gr
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path
from datetime import datetime
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class InvestmentDossierApp:
"""Gradio app for semantic search and investment dossier generation."""
def __init__(self, data_file='data/processed/scored_data.parquet'):
"""Initialize the app."""
self.data_file = data_file
self.df = None
self.embeddings = None
self.model = None
self.last_updated = None
self.load_data()
def load_data(self):
"""Load processed data and embeddings."""
logger.info("Loading data...")
# Load scored data
if Path(self.data_file).exists():
self.df = pd.read_parquet(self.data_file)
logger.info(f"Loaded {len(self.df)} records")
# Extract embeddings
if 'embedding' in self.df.columns:
self.embeddings = np.array(self.df['embedding'].tolist())
logger.info(f"Loaded embeddings: {self.embeddings.shape}")
# Get last update time
if 'scrape_date' in self.df.columns:
self.last_updated = self.df['scrape_date'].iloc[0][:10]
else:
self.last_updated = datetime.now().strftime('%Y-%m-%d')
else:
logger.warning(f"Data file not found: {self.data_file}")
# Create sample data for demo
self.create_demo_data()
def create_demo_data(self):
"""Create demo data if processed data not available."""
logger.info("Creating demo data...")
self.df = pd.DataFrame({
'title': ['Sample Defense Technology'],
'summary': ['This is a demo entry. Run run_pipeline.py to process real data.'],
'source': ['DEMO'],
'date': ['2024-01-01'],
'investment_score': [50.0],
'trl': [5],
'dual_use_score': [50],
'gov_momentum': [50],
'ip_moat': [50],
'theme': ['general'],
'score_explanation': ['Demo entry'],
'recommendation': ['Run pipeline to see real recommendations'],
'url': ['https://example.com']
})
self.embeddings = np.random.rand(1, 384)
self.last_updated = datetime.now().strftime('%Y-%m-%d')
def load_model(self):
"""Load sentence transformer model."""
if self.model is None:
logger.info("Loading sentence transformer model...")
self.model = SentenceTransformer('all-MiniLM-L6-v2')
logger.info("Model loaded")
def semantic_search(self, query, top_k=10, source_filter=None):
"""Perform semantic search on the dataset."""
if self.df is None or len(self.df) == 0:
return None, "No data available. Please run the pipeline first."
self.load_model()
# Generate query embedding
query_embedding = self.model.encode([query], convert_to_numpy=True)
# Calculate similarities
similarities = cosine_similarity(query_embedding, self.embeddings)[0]
# Get top k indices
top_indices = np.argsort(similarities)[::-1][:top_k * 3] # Get more for filtering
# Filter by source if specified
results_df = self.df.iloc[top_indices].copy()
results_df['similarity'] = similarities[top_indices]
if source_filter and source_filter != "All":
results_df = results_df[results_df['source'] == source_filter]
# Keep top k after filtering
results_df = results_df.head(top_k)
return results_df, None
def generate_dossier(self, query, top_k=10, source_filter="All"):
"""Generate investment dossier for query."""
results_df, error = self.semantic_search(query, top_k, source_filter)
if error:
return error, None
if results_df is None or len(results_df) == 0:
return "No results found.", None
# Get top result for detailed dossier
top_result = results_df.iloc[0]
# Create dossier text
dossier = f"""
# 📊 INVESTMENT DOSSIER
## {top_result['title']}
**Source**: {top_result['source']} | **Date**: {top_result['date']} | **Theme**: {top_result['theme']}
---
### 📝 Summary
{top_result['summary'][:500]}
---
### 💯 Investment Score: {top_result['investment_score']}/100
**Ranking**: #{top_result['rank']} out of {len(self.df)} opportunities
**Score Breakdown:**
- 🎯 **Dual-Use Potential**: {top_result['dual_use_score']}/100
- 🔬 **Technology Readiness (TRL)**: {top_result['trl']}/9
- 🛡️ **IP Moat**: {top_result['ip_moat']}/100
- 📈 **Government Momentum**: {top_result['gov_momentum']}/100
- 💰 **Capital Intensity**: {"Low" if top_result['cap_intensity'] == 1 else "Medium" if top_result['cap_intensity'] == 2 else "High"}
---
### 🔍 Why This Score?
{top_result['score_explanation']}
---
### 💡 Recommendation
**{top_result['recommendation']}**
---
### 🔗 Source
{top_result['url']}
---
*Similarity to query: {results_df.iloc[0]['similarity']:.2%}*
"""
# Create results table
results_table = results_df[[
'rank', 'title', 'source', 'investment_score',
'trl', 'dual_use_score', 'theme', 'similarity'
]].copy()
results_table['similarity'] = results_table['similarity'].apply(lambda x: f"{x:.1%}")
results_table.columns = ['Rank', 'Title', 'Source', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Match']
return dossier, results_table
def get_top_opportunities(self, n=20):
"""Get top N investment opportunities."""
if self.df is None or len(self.df) == 0:
return None
top_df = self.df.head(n)[[
'rank', 'title', 'source', 'date', 'investment_score',
'trl', 'dual_use_score', 'theme', 'recommendation'
]].copy()
top_df.columns = ['Rank', 'Title', 'Source', 'Date', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Recommendation']
return top_df
def get_source_stats(self):
"""Get statistics by source."""
if self.df is None or len(self.df) == 0:
return None
stats = self.df.groupby('source').agg({
'investment_score': ['count', 'mean', 'max'],
'dual_use_score': 'mean',
'trl': 'mean'
}).round(1)
stats.columns = ['Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL']
stats = stats.reset_index()
stats.columns = ['Source', 'Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL']
return stats
def create_gradio_interface():
"""Create the Gradio interface."""
app = InvestmentDossierApp()
# Custom CSS
custom_css = """
.gradio-container {
font-family: 'Arial', sans-serif;
}
.header {
text-align: center;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 10px;
margin-bottom: 20px;
}
"""
with gr.Blocks(css=custom_css, title="Defence-Tech Investment Scanner") as demo:
# Header
gr.Markdown(f"""
<div class="header">
<h1>🇮🇳 Bharat Defence-Tech Investment Scanner</h1>
<p>AI-powered semantic search for dual-use technology investments</p>
<p><em>Dataset last updated: {app.last_updated}</em></p>
</div>
""")
# Main search interface
with gr.Tab("🔍 Semantic Search"):
gr.Markdown("""
Search for technologies, companies, or defense applications using natural language.
The AI will find the most relevant opportunities and generate an investment dossier.
""")
with gr.Row():
with gr.Column(scale=3):
search_input = gr.Textbox(
label="Search Query",
placeholder="e.g., 'satellite communication systems' or 'drone technology for surveillance'",
lines=2
)
with gr.Column(scale=1):
source_filter = gr.Dropdown(
choices=["All", "ISRO", "DRDO", "iDEX", "BEL", "HAL", "IN-SPACe", "PIB"],
value="All",
label="Filter by Source"
)
top_k_slider = gr.Slider(
minimum=5, maximum=30, value=10, step=5,
label="Number of results"
)
search_button = gr.Button("🔍 Search", variant="primary", size="lg")
with gr.Row():
with gr.Column():
dossier_output = gr.Markdown(label="Investment Dossier")
with gr.Column():
results_output = gr.Dataframe(label="Top Matches")
search_button.click(
fn=app.generate_dossier,
inputs=[search_input, top_k_slider, source_filter],
outputs=[dossier_output, results_output]
)
# Example queries
gr.Markdown("### Example Queries")
examples = gr.Examples(
examples=[
["satellite communication technology", 10, "All"],
["autonomous drone systems", 10, "All"],
["quantum encryption defense", 10, "All"],
["radar and surveillance systems", 10, "DRDO"],
["space launch technology", 10, "ISRO"],
],
inputs=[search_input, top_k_slider, source_filter]
)
# Top opportunities tab
with gr.Tab("🏆 Top Opportunities"):
gr.Markdown("""
View the highest-scoring investment opportunities ranked by our AI scoring algorithm.
""")
n_opportunities = gr.Slider(
minimum=10, maximum=50, value=20, step=10,
label="Number of opportunities to display"
)
refresh_button = gr.Button("🔄 Refresh Top Opportunities", variant="secondary")
top_opps_output = gr.Dataframe(label="Top Investment Opportunities")
refresh_button.click(
fn=app.get_top_opportunities,
inputs=[n_opportunities],
outputs=[top_opps_output]
)
# Auto-load on startup
demo.load(
fn=app.get_top_opportunities,
inputs=[gr.Number(value=20, visible=False)],
outputs=[top_opps_output]
)
# Analytics tab
with gr.Tab("📊 Analytics"):
gr.Markdown("""
Dataset statistics and source-wise breakdowns.
""")
stats_button = gr.Button("📈 Generate Statistics", variant="secondary")
stats_output = gr.Dataframe(label="Source Statistics")
stats_button.click(
fn=app.get_source_stats,
inputs=[],
outputs=[stats_output]
)
# Dataset info
gr.Markdown(f"""
### Dataset Information
- **Total Records**: {len(app.df) if app.df is not None else 0}
- **Sources Covered**: {app.df['source'].nunique() if app.df is not None else 0}
- **Date Range**: {app.df['date'].min() if app.df is not None else 'N/A'} to {app.df['date'].max() if app.df is not None else 'N/A'}
- **Last Updated**: {app.last_updated}
""")
# About tab
with gr.Tab("ℹ️ About"):
gr.Markdown("""
## About This Tool
The **Bharat Defence-Tech Investment Scanner** uses AI to identify and score dual-use technology
investment opportunities from India's defence and space ecosystem.
### 📊 Scoring Methodology
Investment scores (0-100) are calculated using:
- **Dual-Use Potential** (30%): Commercial applicability beyond defence
- **Technology Readiness** (25%): TRL level (1-9 scale)
- **Government Momentum** (20%): Tender activity, procurement signals
- **IP Moat** (15%): Patent strength and technology transfer potential
- **Capital Intensity** (10%): Lower capex = higher score
### 🎯 Data Sources
- **ISRO**: Space missions, satellite technology
- **DRDO**: Defence R&D, missile systems
- **iDEX**: Defence innovation challenges
- **BEL**: Defence electronics, radar systems
- **HAL**: Aerospace development
- **IN-SPACe**: Private space sector
- **PIB**: Government press releases
### 🔍 Semantic Search
Powered by sentence-transformers (all-MiniLM-L6-v2) for intelligent matching
beyond keyword search.
### ⚖️ Limitations & Disclaimers
- Data sourced from public government websites only
- Scores are algorithmic estimates, not investment advice
- Always conduct thorough due diligence
- Update frequency: Weekly (manual curation) + Monthly (automated scraping)
### 📝 License & Attribution
- All data from public government sources
- Respects robots.txt and rate limits
- Source attribution maintained for all records
---
**Built with**: Python, Pandas, Sentence-Transformers, Gradio
**Deployed on**: Hugging Face Spaces
**Version**: 1.0
""")
return demo
def main():
"""Launch the Gradio app."""
demo = create_gradio_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)
if __name__ == "__main__":
main()