Spaces:

AAdevloper
/

BharatDefenseTechScannerv1

Sleeping

AAdevloper

Deploy Defence-Tech Investment Scanner v1.0 - Complete AI-powered tool with 42 scored opportunities

7e85722 4 months ago

14.9 kB

	"""
	Gradio UI for Defence-Tech Investment Scanner.
	Deploys to Hugging Face Spaces.
	"""

	import gradio as gr
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	from pathlib import Path
	from datetime import datetime
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class InvestmentDossierApp:
	"""Gradio app for semantic search and investment dossier generation."""

	def __init__(self, data_file='data/processed/scored_data.parquet'):
	"""Initialize the app."""
	self.data_file = data_file
	self.df = None
	self.embeddings = None
	self.model = None
	self.last_updated = None
	self.load_data()

	def load_data(self):
	"""Load processed data and embeddings."""
	logger.info("Loading data...")

	# Load scored data
	if Path(self.data_file).exists():
	self.df = pd.read_parquet(self.data_file)
	logger.info(f"Loaded {len(self.df)} records")

	# Extract embeddings
	if 'embedding' in self.df.columns:
	self.embeddings = np.array(self.df['embedding'].tolist())
	logger.info(f"Loaded embeddings: {self.embeddings.shape}")

	# Get last update time
	if 'scrape_date' in self.df.columns:
	self.last_updated = self.df['scrape_date'].iloc[0][:10]
	else:
	self.last_updated = datetime.now().strftime('%Y-%m-%d')
	else:
	logger.warning(f"Data file not found: {self.data_file}")
	# Create sample data for demo
	self.create_demo_data()

	def create_demo_data(self):
	"""Create demo data if processed data not available."""
	logger.info("Creating demo data...")
	self.df = pd.DataFrame({
	'title': ['Sample Defense Technology'],
	'summary': ['This is a demo entry. Run run_pipeline.py to process real data.'],
	'source': ['DEMO'],
	'date': ['2024-01-01'],
	'investment_score': [50.0],
	'trl': [5],
	'dual_use_score': [50],
	'gov_momentum': [50],
	'ip_moat': [50],
	'theme': ['general'],
	'score_explanation': ['Demo entry'],
	'recommendation': ['Run pipeline to see real recommendations'],
	'url': ['https://example.com']
	})
	self.embeddings = np.random.rand(1, 384)
	self.last_updated = datetime.now().strftime('%Y-%m-%d')

	def load_model(self):
	"""Load sentence transformer model."""
	if self.model is None:
	logger.info("Loading sentence transformer model...")
	self.model = SentenceTransformer('all-MiniLM-L6-v2')
	logger.info("Model loaded")

	def semantic_search(self, query, top_k=10, source_filter=None):
	"""Perform semantic search on the dataset."""
	if self.df is None or len(self.df) == 0:
	return None, "No data available. Please run the pipeline first."

	self.load_model()

	# Generate query embedding
	query_embedding = self.model.encode([query], convert_to_numpy=True)

	# Calculate similarities
	similarities = cosine_similarity(query_embedding, self.embeddings)[0]

	# Get top k indices
	top_indices = np.argsort(similarities)[::-1][:top_k * 3] # Get more for filtering

	# Filter by source if specified
	results_df = self.df.iloc[top_indices].copy()
	results_df['similarity'] = similarities[top_indices]

	if source_filter and source_filter != "All":
	results_df = results_df[results_df['source'] == source_filter]

	# Keep top k after filtering
	results_df = results_df.head(top_k)

	return results_df, None

	def generate_dossier(self, query, top_k=10, source_filter="All"):
	"""Generate investment dossier for query."""
	results_df, error = self.semantic_search(query, top_k, source_filter)

	if error:
	return error, None

	if results_df is None or len(results_df) == 0:
	return "No results found.", None

	# Get top result for detailed dossier
	top_result = results_df.iloc[0]

	# Create dossier text
	dossier = f"""
	# 📊 INVESTMENT DOSSIER

	## {top_result['title']}

	Source: {top_result['source']} \| Date: {top_result['date']} \| Theme: {top_result['theme']}

	---

	### 📝 Summary
	{top_result['summary'][:500]}

	---

	### 💯 Investment Score: {top_result['investment_score']}/100

	Ranking: #{top_result['rank']} out of {len(self.df)} opportunities

	Score Breakdown:
	- 🎯 Dual-Use Potential: {top_result['dual_use_score']}/100
	- 🔬 Technology Readiness (TRL): {top_result['trl']}/9
	- 🛡️ IP Moat: {top_result['ip_moat']}/100
	- 📈 Government Momentum: {top_result['gov_momentum']}/100
	- 💰 Capital Intensity: {"Low" if top_result['cap_intensity'] == 1 else "Medium" if top_result['cap_intensity'] == 2 else "High"}

	---

	### 🔍 Why This Score?

	{top_result['score_explanation']}

	---

	### 💡 Recommendation

	{top_result['recommendation']}

	---

	### 🔗 Source
	{top_result['url']}

	---

	Similarity to query: {results_df.iloc[0]['similarity']:.2%}
	"""

	# Create results table
	results_table = results_df[[
	'rank', 'title', 'source', 'investment_score',
	'trl', 'dual_use_score', 'theme', 'similarity'
	]].copy()

	results_table['similarity'] = results_table['similarity'].apply(lambda x: f"{x:.1%}")
	results_table.columns = ['Rank', 'Title', 'Source', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Match']

	return dossier, results_table

	def get_top_opportunities(self, n=20):
	"""Get top N investment opportunities."""
	if self.df is None or len(self.df) == 0:
	return None

	top_df = self.df.head(n)[[
	'rank', 'title', 'source', 'date', 'investment_score',
	'trl', 'dual_use_score', 'theme', 'recommendation'
	]].copy()

	top_df.columns = ['Rank', 'Title', 'Source', 'Date', 'Score', 'TRL', 'Dual-Use', 'Theme', 'Recommendation']

	return top_df

	def get_source_stats(self):
	"""Get statistics by source."""
	if self.df is None or len(self.df) == 0:
	return None

	stats = self.df.groupby('source').agg({
	'investment_score': ['count', 'mean', 'max'],
	'dual_use_score': 'mean',
	'trl': 'mean'
	}).round(1)

	stats.columns = ['Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL']
	stats = stats.reset_index()
	stats.columns = ['Source', 'Count', 'Avg Score', 'Max Score', 'Avg Dual-Use', 'Avg TRL']

	return stats


	def create_gradio_interface():
	"""Create the Gradio interface."""

	app = InvestmentDossierApp()

	# Custom CSS
	custom_css = """
	.gradio-container {
	font-family: 'Arial', sans-serif;
	}
	.header {
	text-align: center;
	padding: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 10px;
	margin-bottom: 20px;
	}
	"""

	with gr.Blocks(css=custom_css, title="Defence-Tech Investment Scanner") as demo:

	# Header
	gr.Markdown(f"""
	<div class="header">
	<h1>🇮🇳 Bharat Defence-Tech Investment Scanner</h1>
	<p>AI-powered semantic search for dual-use technology investments</p>
	<p><em>Dataset last updated: {app.last_updated}</em></p>
	</div>
	""")

	# Main search interface
	with gr.Tab("🔍 Semantic Search"):
	gr.Markdown("""
	Search for technologies, companies, or defense applications using natural language.
	The AI will find the most relevant opportunities and generate an investment dossier.
	""")

	with gr.Row():
	with gr.Column(scale=3):
	search_input = gr.Textbox(
	label="Search Query",
	placeholder="e.g., 'satellite communication systems' or 'drone technology for surveillance'",
	lines=2
	)
	with gr.Column(scale=1):
	source_filter = gr.Dropdown(
	choices=["All", "ISRO", "DRDO", "iDEX", "BEL", "HAL", "IN-SPACe", "PIB"],
	value="All",
	label="Filter by Source"
	)

	top_k_slider = gr.Slider(
	minimum=5, maximum=30, value=10, step=5,
	label="Number of results"
	)

	search_button = gr.Button("🔍 Search", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	dossier_output = gr.Markdown(label="Investment Dossier")
	with gr.Column():
	results_output = gr.Dataframe(label="Top Matches")

	search_button.click(
	fn=app.generate_dossier,
	inputs=[search_input, top_k_slider, source_filter],
	outputs=[dossier_output, results_output]
	)

	# Example queries
	gr.Markdown("### Example Queries")
	examples = gr.Examples(
	examples=[
	["satellite communication technology", 10, "All"],
	["autonomous drone systems", 10, "All"],
	["quantum encryption defense", 10, "All"],
	["radar and surveillance systems", 10, "DRDO"],
	["space launch technology", 10, "ISRO"],
	],
	inputs=[search_input, top_k_slider, source_filter]
	)

	# Top opportunities tab
	with gr.Tab("🏆 Top Opportunities"):
	gr.Markdown("""
	View the highest-scoring investment opportunities ranked by our AI scoring algorithm.
	""")

	n_opportunities = gr.Slider(
	minimum=10, maximum=50, value=20, step=10,
	label="Number of opportunities to display"
	)

	refresh_button = gr.Button("🔄 Refresh Top Opportunities", variant="secondary")

	top_opps_output = gr.Dataframe(label="Top Investment Opportunities")

	refresh_button.click(
	fn=app.get_top_opportunities,
	inputs=[n_opportunities],
	outputs=[top_opps_output]
	)

	# Auto-load on startup
	demo.load(
	fn=app.get_top_opportunities,
	inputs=[gr.Number(value=20, visible=False)],
	outputs=[top_opps_output]
	)

	# Analytics tab
	with gr.Tab("📊 Analytics"):
	gr.Markdown("""
	Dataset statistics and source-wise breakdowns.
	""")

	stats_button = gr.Button("📈 Generate Statistics", variant="secondary")
	stats_output = gr.Dataframe(label="Source Statistics")

	stats_button.click(
	fn=app.get_source_stats,
	inputs=[],
	outputs=[stats_output]
	)

	# Dataset info
	gr.Markdown(f"""
	### Dataset Information
	- Total Records: {len(app.df) if app.df is not None else 0}
	- Sources Covered: {app.df['source'].nunique() if app.df is not None else 0}
	- Date Range: {app.df['date'].min() if app.df is not None else 'N/A'} to {app.df['date'].max() if app.df is not None else 'N/A'}
	- Last Updated: {app.last_updated}
	""")

	# About tab
	with gr.Tab("ℹ️ About"):
	gr.Markdown("""
	## About This Tool

	The Bharat Defence-Tech Investment Scanner uses AI to identify and score dual-use technology
	investment opportunities from India's defence and space ecosystem.

	### 📊 Scoring Methodology

	Investment scores (0-100) are calculated using:

	- Dual-Use Potential (30%): Commercial applicability beyond defence
	- Technology Readiness (25%): TRL level (1-9 scale)
	- Government Momentum (20%): Tender activity, procurement signals
	- IP Moat (15%): Patent strength and technology transfer potential
	- Capital Intensity (10%): Lower capex = higher score

	### 🎯 Data Sources

	- ISRO: Space missions, satellite technology
	- DRDO: Defence R&D, missile systems
	- iDEX: Defence innovation challenges
	- BEL: Defence electronics, radar systems
	- HAL: Aerospace development
	- IN-SPACe: Private space sector
	- PIB: Government press releases

	### 🔍 Semantic Search

	Powered by sentence-transformers (all-MiniLM-L6-v2) for intelligent matching
	beyond keyword search.

	### ⚖️ Limitations & Disclaimers

	- Data sourced from public government websites only
	- Scores are algorithmic estimates, not investment advice
	- Always conduct thorough due diligence
	- Update frequency: Weekly (manual curation) + Monthly (automated scraping)

	### 📝 License & Attribution

	- All data from public government sources
	- Respects robots.txt and rate limits
	- Source attribution maintained for all records

	---

	Built with: Python, Pandas, Sentence-Transformers, Gradio
	Deployed on: Hugging Face Spaces
	Version: 1.0
	""")

	return demo


	def main():
	"""Launch the Gradio app."""
	demo = create_gradio_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)


	if __name__ == "__main__":
	main()