afkdark commited on
Commit
6279673
·
verified ·
1 Parent(s): 56d93b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import re
3
+ import random
4
+ from nltk.tokenize import sent_tokenize
5
+ from nltk.tag import pos_tag
6
+ from nltk.chunk import ne_chunk
7
+ from nltk.tree import Tree
8
+ import gradio as gr
9
+
10
+ # Download necessary NLTK data
11
+ nltk.download('punkt')
12
+ nltk.download('averaged_perceptron_tagger')
13
+ nltk.download('maxent_ne_chunker')
14
+ nltk.download('words')
15
+
16
+ def get_named_entities(text):
17
+ """Extract named entities from text."""
18
+ chunked = ne_chunk(pos_tag(nltk.word_tokenize(text)))
19
+ named_entities = []
20
+
21
+ for chunk in chunked:
22
+ if isinstance(chunk, Tree):
23
+ entity = ' '.join([word for word, tag in chunk.leaves()])
24
+ named_entities.append((entity, chunk.label()))
25
+
26
+ return named_entities
27
+
28
+ def generate_question_from_sentence(sentence):
29
+ """Generate a question from a sentence."""
30
+ # Remove punctuation at the end
31
+ sentence = re.sub(r'[.!?]$', '', sentence)
32
+
33
+ # Check for common patterns that can be turned into questions
34
+ if re.search(r'\bis\s|\bwas\s|\bwere\s|\bare\s', sentence):
35
+ # Convert statements with "is", "was", "were", "are" into yes/no questions
36
+ match = re.search(r'^(.*?)\s(is|was|were|are)\s(.*?)$', sentence, re.IGNORECASE)
37
+ if match:
38
+ return f"{match.group(2).capitalize()} {match.group(1)} {match.group(3)}?"
39
+
40
+ # Check for sentences with dates or years
41
+ if re.search(r'\b(in|on|during)\s\d{4}\b|\b(January|February|March|April|May|June|July|August|September|October|November|December)\b', sentence, re.IGNORECASE):
42
+ return f"When did {sentence.lower()}?"
43
+
44
+ # Get named entities
45
+ entities = get_named_entities(sentence)
46
+
47
+ # If there are named entities, ask about them
48
+ if entities:
49
+ entity, entity_type = entities[0]
50
+ if entity_type == 'PERSON':
51
+ return f"Who is {entity}?"
52
+ elif entity_type in ['GPE', 'LOCATION']:
53
+ return f"Where is {entity}?"
54
+ elif entity_type == 'ORGANIZATION':
55
+ return f"What is {entity}?"
56
+
57
+ # Check for sentences with "because", "due to", "as a result"
58
+ if re.search(r'\bbecause\b|\bdue to\b|\bas a result\b', sentence, re.IGNORECASE):
59
+ return f"Why {sentence.lower()}?"
60
+
61
+ # Default questions based on sentence structure
62
+ words = nltk.word_tokenize(sentence)
63
+ pos_tags = pos_tag(words)
64
+
65
+ # Check if sentence has a verb
66
+ has_verb = any(tag.startswith('VB') for _, tag in pos_tags)
67
+
68
+ if has_verb:
69
+ # Extract subject (simplistic approach)
70
+ subject = ""
71
+ for word, tag in pos_tags:
72
+ if tag.startswith('NN') or tag.startswith('PRP'):
73
+ subject = word
74
+ break
75
+
76
+ if subject:
77
+ if subject.lower() in ['i', 'you', 'we', 'they']:
78
+ return f"What did {subject.lower()} do?"
79
+ else:
80
+ return f"What did {subject} do?"
81
+ else:
82
+ # Fallback to "what" question
83
+ return f"What {sentence.lower()}?"
84
+
85
+ # Very generic fallback
86
+ question_starters = [
87
+ "What is described in",
88
+ "What is mentioned about",
89
+ "Can you explain",
90
+ "Could you elaborate on"
91
+ ]
92
+
93
+ return f"{random.choice(question_starters)} the statement: '{sentence}'?"
94
+
95
+ def paragraph_to_questions(paragraph):
96
+ """Generate questions from a paragraph."""
97
+ sentences = sent_tokenize(paragraph)
98
+ questions = []
99
+
100
+ for sentence in sentences:
101
+ # Skip very short sentences
102
+ if len(sentence.split()) < 4:
103
+ continue
104
+
105
+ question = generate_question_from_sentence(sentence)
106
+ questions.append(question)
107
+
108
+ return questions
109
+
110
+ # Function to format the output for Gradio
111
+ def generate_questions(paragraph):
112
+ questions = paragraph_to_questions(paragraph)
113
+ return "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
114
+
115
+ # Create Gradio interface
116
+ demo = gr.Interface(
117
+ fn=generate_questions,
118
+ inputs=gr.Textbox(lines=10, placeholder="Enter a paragraph to generate questions..."),
119
+ outputs=gr.Textbox(label="Generated Questions"),
120
+ title="Paragraph to Questions Generator",
121
+ description="Enter a paragraph and the model will generate relevant questions based on the content.",
122
+ examples=[
123
+ ["Artificial intelligence has revolutionized many industries. Companies like Google and OpenAI are investing billions in research. The field continues to grow rapidly, with new breakthroughs announced every month. Concerns about ethics and regulation remain important topics of discussion."],
124
+ ["The Great Barrier Reef is the world's largest coral reef system. It is located off the coast of Queensland, Australia. The reef is home to thousands of species of marine life. Climate change poses a significant threat to its survival."]
125
+ ]
126
+ )
127
+
128
+ # For use as a module in other Hugging Face applications
129
+ def generate_questions_from_text(text):
130
+ return paragraph_to_questions(text)
131
+
132
+ # Launch the app if running directly
133
+ if __name__ == "__main__":
134
+ demo.launch()