Spaces:

Anupam202224
/

SmartDocGenerator

Build error

File size: 2,870 Bytes

335040e

import spacy
from transformers import pipeline
from collections import defaultdict
import pandas as pd
import gradio as gr
import os

# Load spaCy model for prompt parsing
nlp = spacy.load("en_core_web_sm")

# Load text generation model
text_generator = pipeline('text-generation', model='gpt2')

# Load dataset
dataset_path = "public_service_data.csv"  # Ensure this file is uploaded to your Hugging Face Space
if os.path.exists(dataset_path):
    data_df = pd.read_csv(dataset_path)
else:
    data_df = pd.DataFrame({"growth_rate": [0]})  # Default structure if the file isn't present

def generate_project_document(prompt, document_type):
    # Parse prompt using spaCy
    doc = nlp(prompt)
    
    # Identify key elements and required sections
    industry = next((ent.text for ent in doc.ents if ent.label_ == 'ORG'), "the relevant industry")
    product = next((ent.text for ent in doc.ents if ent.label_ == 'PRODUCT'), "the specified product")
    
    if document_type == 'Project Proposal':
        sections = ['Introduction', 'Objectives', 'Methodology', 'Expected Outcomes']
    elif document_type == 'Report':
        sections = ['Executive Summary', 'Findings', 'Analysis', 'Conclusion']
    elif document_type == 'Summary':
        sections = ['Overview', 'Key Points', 'Conclusion']
    else:
        return None, "Invalid document type"
    
    # Generate content for each section
    content = defaultdict(str)
    for section in sections:
        prompt_text = f"Generate {section.lower()} for a {industry} project on {product}."
        content[section] = text_generator(prompt_text, max_length=200, num_return_sequences=1)[0]['generated_text']
    
    # Enrich content with data
    if document_type == 'Project Proposal':
        growth_rate = data_df['growth_rate'].mean() if not data_df.empty else 0
        content['Introduction'] += f"\nThe {industry} industry has witnessed significant advancements in {product} technology. According to the latest industry data, the market for {product} is expected to grow by {growth_rate:.2f}% annually."
    
    # Compile document as string to return
    document_content = "\n\n".join([f"{section}\n{text}" for section, text in content.items()])
    return document_content

# Gradio interface
def app():
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(label="Document Prompt")
                document_type = gr.Dropdown(label="Document Type", choices=["Project Proposal", "Report", "Summary"])
                submit = gr.Button("Generate Document")
            with gr.Column():
                output = gr.Textbox(label="Generated Document Content", lines=15)
        
        submit.click(generate_project_document, inputs=[prompt, document_type], outputs=output)
    
    demo.launch()

if __name__ == "__main__":
    app()