import spacy from transformers import pipeline from collections import defaultdict import pandas as pd import gradio as gr import os # Load spaCy model for prompt parsing nlp = spacy.load("en_core_web_sm") # Load text generation model text_generator = pipeline('text-generation', model='gpt2') # Load dataset dataset_path = "public_service_data.csv" # Ensure this file is uploaded to your Hugging Face Space if os.path.exists(dataset_path): data_df = pd.read_csv(dataset_path) else: data_df = pd.DataFrame({"growth_rate": [0]}) # Default structure if the file isn't present def generate_project_document(prompt, document_type): # Parse prompt using spaCy doc = nlp(prompt) # Identify key elements and required sections industry = next((ent.text for ent in doc.ents if ent.label_ == 'ORG'), "the relevant industry") product = next((ent.text for ent in doc.ents if ent.label_ == 'PRODUCT'), "the specified product") if document_type == 'Project Proposal': sections = ['Introduction', 'Objectives', 'Methodology', 'Expected Outcomes'] elif document_type == 'Report': sections = ['Executive Summary', 'Findings', 'Analysis', 'Conclusion'] elif document_type == 'Summary': sections = ['Overview', 'Key Points', 'Conclusion'] else: return None, "Invalid document type" # Generate content for each section content = defaultdict(str) for section in sections: prompt_text = f"Generate {section.lower()} for a {industry} project on {product}." content[section] = text_generator(prompt_text, max_length=200, num_return_sequences=1)[0]['generated_text'] # Enrich content with data if document_type == 'Project Proposal': growth_rate = data_df['growth_rate'].mean() if not data_df.empty else 0 content['Introduction'] += f"\nThe {industry} industry has witnessed significant advancements in {product} technology. According to the latest industry data, the market for {product} is expected to grow by {growth_rate:.2f}% annually." # Compile document as string to return document_content = "\n\n".join([f"{section}\n{text}" for section, text in content.items()]) return document_content # Gradio interface def app(): with gr.Blocks() as demo: with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Document Prompt") document_type = gr.Dropdown(label="Document Type", choices=["Project Proposal", "Report", "Summary"]) submit = gr.Button("Generate Document") with gr.Column(): output = gr.Textbox(label="Generated Document Content", lines=15) submit.click(generate_project_document, inputs=[prompt, document_type], outputs=output) demo.launch() if __name__ == "__main__": app()