Anupam202224's picture
Create app.py
335040e verified
import spacy
from transformers import pipeline
from collections import defaultdict
import pandas as pd
import gradio as gr
import os
# Load spaCy model for prompt parsing
nlp = spacy.load("en_core_web_sm")
# Load text generation model
text_generator = pipeline('text-generation', model='gpt2')
# Load dataset
dataset_path = "public_service_data.csv" # Ensure this file is uploaded to your Hugging Face Space
if os.path.exists(dataset_path):
data_df = pd.read_csv(dataset_path)
else:
data_df = pd.DataFrame({"growth_rate": [0]}) # Default structure if the file isn't present
def generate_project_document(prompt, document_type):
# Parse prompt using spaCy
doc = nlp(prompt)
# Identify key elements and required sections
industry = next((ent.text for ent in doc.ents if ent.label_ == 'ORG'), "the relevant industry")
product = next((ent.text for ent in doc.ents if ent.label_ == 'PRODUCT'), "the specified product")
if document_type == 'Project Proposal':
sections = ['Introduction', 'Objectives', 'Methodology', 'Expected Outcomes']
elif document_type == 'Report':
sections = ['Executive Summary', 'Findings', 'Analysis', 'Conclusion']
elif document_type == 'Summary':
sections = ['Overview', 'Key Points', 'Conclusion']
else:
return None, "Invalid document type"
# Generate content for each section
content = defaultdict(str)
for section in sections:
prompt_text = f"Generate {section.lower()} for a {industry} project on {product}."
content[section] = text_generator(prompt_text, max_length=200, num_return_sequences=1)[0]['generated_text']
# Enrich content with data
if document_type == 'Project Proposal':
growth_rate = data_df['growth_rate'].mean() if not data_df.empty else 0
content['Introduction'] += f"\nThe {industry} industry has witnessed significant advancements in {product} technology. According to the latest industry data, the market for {product} is expected to grow by {growth_rate:.2f}% annually."
# Compile document as string to return
document_content = "\n\n".join([f"{section}\n{text}" for section, text in content.items()])
return document_content
# Gradio interface
def app():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Document Prompt")
document_type = gr.Dropdown(label="Document Type", choices=["Project Proposal", "Report", "Summary"])
submit = gr.Button("Generate Document")
with gr.Column():
output = gr.Textbox(label="Generated Document Content", lines=15)
submit.click(generate_project_document, inputs=[prompt, document_type], outputs=output)
demo.launch()
if __name__ == "__main__":
app()