bahakizil commited on
Commit
dbd9712
·
verified ·
1 Parent(s): 5379a19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -147
app.py CHANGED
@@ -27,7 +27,9 @@ client = OpenAI(api_key="sk-proj-ALzSolLWgz2iSnP3jwT0kZSfRmLXn1cywJrCNwAq7Ys0cRr
27
 
28
  def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
29
  """
30
- GPT-4o-mini modeline istek: max_tokens=10000 => uzun metinler
 
 
31
  """
32
  response = client.chat.completions.create(
33
  model="gpt-4o-mini",
@@ -38,102 +40,103 @@ def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
38
  )
39
  return response.choices[0].message.content
40
 
41
- ### 2) Heading 1 (chunk #1 + chunk #2) => API Call #1
42
- def heading1_part1_and_part2_api(input_text):
 
 
43
  """
44
- Ilk cagirida Heading1 icin 2 chunk (part1, part2) uretilir.
45
- 1. chunk => partial text
46
- 2. chunk => finalize
47
  """
48
- # Chunk #1
49
  prompt1 = f"""
50
- We want Heading 1 in two parts.
51
- PART 1: ~1000+ words introduction.
52
- DO NOT finalize.
53
  Input:
54
  {input_text}
55
  """
56
- msg1 = [
57
  {"role": "system", "content": "You are an AI assistant creating heading1 part1."},
58
  {"role": "user", "content": prompt1}
59
  ]
60
- part1_text = call_openai_chat(msg1)
61
 
62
- # Chunk #2
63
  prompt2 = f"""
64
- We have partial heading1:
65
- {part1_text}
66
 
67
- Now finalize heading1. Make sure total ~2000+ words.
68
- Return final heading1 text only.
69
  """
70
- msg2 = [
71
  {"role": "system", "content": "You are finalizing heading #1."},
72
  {"role": "user", "content": prompt2}
73
  ]
74
- heading1_text = call_openai_chat(msg2)
 
75
 
76
- return heading1_text
77
 
78
- ### 3) Heading 2 + Heading 3 => API Call #2 (chunk #3 + chunk #4)
79
  def heading2_and_3_api(heading1_text):
80
  """
81
- Tek cagirida 2 chunk:
82
- - chunk#3 => heading2
83
- - chunk#4 => heading3
84
  """
85
- # Heading2
86
  prompt_h2 = f"""
87
  We have heading1 for context.
88
- Produce 'Heading 2: Detailed explanation of common risks.'
89
- ~1000+ words. Return only heading2 text.
90
- Context:
91
- {heading1_text[:1500]}...
92
  """
93
- msg_h2 = [
94
- {"role": "system", "content": "You are AI assistant creating heading2."},
95
  {"role": "user", "content": prompt_h2}
96
  ]
97
- heading2_text = call_openai_chat(msg_h2)
98
 
99
- # Heading3
100
  prompt_h3 = f"""
101
  We have heading1 for context.
102
- Produce 'Heading 3: Practical examples and solutions.'
103
- ~1000+ words. Return only heading3 text.
104
- Context:
105
- {heading1_text[:1500]}...
106
  """
107
- msg_h3 = [
108
- {"role": "system", "content": "You are AI assistant creating heading3."},
109
  {"role": "user", "content": prompt_h3}
110
  ]
111
- heading3_text = call_openai_chat(msg_h3)
112
 
113
  return heading2_text, heading3_text
114
 
115
- ### 4) Heading4 + expansions => API Call #3 (chunk #5 + chunk #6)
116
  def heading4_and_expansion_api(h1_text, h2_text, h3_text, original_input):
117
  """
118
- - chunk#5 => heading4
119
- - chunk#6 => expansions if <4000 words or shorten if >10000
 
120
  """
121
- # Chunk #5 => heading4
122
  prompt_h4 = f"""
123
- We have heading1,2,3. Now produce heading4: 'Summary and next steps'
124
- At least ~1000 words. Return only heading4 text.
125
- Context:
126
- {h1_text[:1200]}...
 
127
  """
128
- msg_h4 = [
129
- {"role": "system", "content": "You are AI assistant creating heading4."},
130
  {"role": "user", "content": prompt_h4}
131
  ]
132
- heading4_text = call_openai_chat(msg_h4)
133
 
134
- # Chunk #6 => expansions/shorten
135
- prompt_final = f"""
136
  We have 4 headings now:
 
137
  [Heading1]
138
  {h1_text}
139
 
@@ -146,123 +149,87 @@ We have 4 headings now:
146
  [Heading4]
147
  {heading4_text}
148
 
149
- Combine them into ONE final text.
150
  If total < 4000 words => expand.
151
  If > 10000 => shorten.
152
- Return final text only.
153
-
154
- Original input for references:
155
  {original_input}
156
  """
157
- msg_final = [
158
- {"role": "system", "content": "You are ensuring final text is 4000-10000 words."},
159
- {"role": "user", "content": prompt_final}
160
  ]
161
- final_text = call_openai_chat(msg_final)
162
  return final_text
163
 
164
- ### 5) Dosya Okuma
165
- def read_pdf(path:str) -> str:
166
- txt = ""
167
- with open(path,"rb") as f:
168
- pdf = PdfReader(f)
169
- for page in pdf.pages:
170
- p_txt = page.extract_text()
171
- if p_txt:
172
- txt += p_txt
173
- return txt
174
-
175
- def read_docx(path:str) -> str:
176
- doc = Document(path)
177
- result = []
178
- for para in doc.paragraphs:
179
- result.append(para.text)
180
- return "\n".join(result)
181
-
182
- def read_txt(path:str) -> str:
183
- with open(path,"r",encoding="utf-8",errors="ignore") as f:
184
- return f.read()
185
-
186
- def read_input_file_or_text(file_obj, text_str):
187
- """
188
- Gradio 'File' bileşeni => dictionary, .name, .data yoksa
189
- HF versiyonuna göre .get('data') vs.
190
- """
191
- if file_obj is not None:
192
- file_name = file_obj.name
193
- file_data = file_obj.get("data",None)
194
- if not file_data:
195
- # Bazı Gradio versiyonlarında file_obj kendisi string olabilir
196
- # or "NamedString"
197
- return file_obj.name or ""
198
-
199
- with open(file_name, "wb") as tmp:
200
- tmp.write(file_data)
201
-
202
- ext = file_name.lower().split(".")[-1]
203
- if ext=="pdf":
204
- return read_pdf(file_name)
205
- elif ext=="docx":
206
- return read_docx(file_name)
207
- elif ext=="txt":
208
- return read_txt(file_name)
209
- else:
210
- # fallback decode
211
- return file_data.decode("utf-8", errors="ignore")
212
- else:
213
- return text_str.strip()
214
-
215
- ### 6) pipeline
216
- def main_pipeline(input_content):
217
  """
218
- 3 api call => 6 chunk
 
 
 
219
  """
220
- # API Call #1 => heading1 (part1+part2)
221
- heading1_text = heading1_part1_and_part2_api(input_content)
222
 
223
- # API Call #2 => heading2, heading3
224
  heading2_text, heading3_text = heading2_and_3_api(heading1_text)
225
 
226
- # API Call #3 => heading4 + expansions
227
  final_text = heading4_and_expansion_api(
228
- heading1_text, heading2_text, heading3_text, input_content
 
 
 
229
  )
230
- # Son
231
- word_count = len(re.sub(r"<.*?>","", final_text).split())
232
- return final_text, word_count
233
 
234
- ### 7) Gradio Fonksiyon
235
- def run_app(user_text):
236
- # Dosya veya metin
237
- content = read_input_file_or_text(user_text)
238
- if not content:
239
- return ("⚠️ Please provide text", "")
 
240
 
241
  # pipeline
242
- final_text, wcount = main_pipeline(content)
243
- final_html = final_text.replace("\n", "<br>")
244
- info = f"✅ Done. The final text has approx {wcount} words."
 
 
 
 
 
245
  return (final_html, info)
246
 
247
- ### 8) Gradio Arayüz
248
- def build_gradio_interface():
249
- txt_box = gr.Textbox(
250
- lines=5, label="Transcript Input",
251
- )
252
  )
253
- out_html = gr.HTML(label="Final Output")
254
- out_info = gr.Label(label="Process Info")
255
 
256
  demo = gr.Interface(
257
- fn=run_app,
258
- inputs=[txt_box, file_comp],
259
- outputs=[out_html, out_info],
260
- title="GPT-4o-mini 3-API-Calls, 6-chunk approach",
261
- description="3 separate API calls => 6 chunks total. Ensures 4k-10k words."
 
 
 
 
262
  )
263
  return demo
264
 
265
- if __name__=="__main__":
266
- # App
267
- app = build_gradio_interface()
268
  app.launch()
 
27
 
28
  def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
29
  """
30
+ gpt-4o-mini modeline istek atar.
31
+ - max_tokens=10000 => uzun metin
32
+ - temperature=0.8 => daha yaratıcı
33
  """
34
  response = client.chat.completions.create(
35
  model="gpt-4o-mini",
 
40
  )
41
  return response.choices[0].message.content
42
 
43
+
44
+ # ============== 2) Chunk Fonksiyonları ===============
45
+
46
+ def heading1_part1_and_part2(input_text):
47
  """
48
+ API Çağrısı #1 => 2 chunk (Heading 1 Part1 + Part2)
49
+ Part1 ~1000 kelime, Part2 => final ~2000 kelime
 
50
  """
51
+ # chunk #1 => part1
52
  prompt1 = f"""
53
+ We want Heading 1 (introductory overview) in two parts.
54
+ PART 1 => around 1000+ words. NOT final.
 
55
  Input:
56
  {input_text}
57
  """
58
+ msgs1 = [
59
  {"role": "system", "content": "You are an AI assistant creating heading1 part1."},
60
  {"role": "user", "content": prompt1}
61
  ]
62
+ h1_part1 = call_openai_chat(msgs1)
63
 
64
+ # chunk #2 => part2 => finalize
65
  prompt2 = f"""
66
+ Partial heading1:
67
+ {h1_part1}
68
 
69
+ Now finalize heading1 with part2.
70
+ Ensure total ~2000+ words. Return final heading1 only.
71
  """
72
+ msgs2 = [
73
  {"role": "system", "content": "You are finalizing heading #1."},
74
  {"role": "user", "content": prompt2}
75
  ]
76
+ heading1_final = call_openai_chat(msgs2)
77
+ return heading1_final
78
 
 
79
 
 
80
  def heading2_and_3_api(heading1_text):
81
  """
82
+ API Çağrısı #2 => 2 chunk (Heading2, Heading3)
83
+ chunk #3 => heading2
84
+ chunk #4 => heading3
85
  """
86
+ # heading2
87
  prompt_h2 = f"""
88
  We have heading1 for context.
89
+ Produce 'Heading 2: Detailed explanation of common risks.' ~1000+ words.
90
+ Return heading2 text only.
91
+ Context sample:
92
+ {heading1_text[:1500]}
93
  """
94
+ msgs_h2 = [
95
+ {"role": "system", "content": "You are creating heading2."},
96
  {"role": "user", "content": prompt_h2}
97
  ]
98
+ heading2_text = call_openai_chat(msgs_h2)
99
 
100
+ # heading3
101
  prompt_h3 = f"""
102
  We have heading1 for context.
103
+ Produce 'Heading 3: Practical examples and solutions.' ~1000+ words.
104
+ Return heading3 text only.
105
+ Context sample:
106
+ {heading1_text[:1500]}
107
  """
108
+ msgs_h3 = [
109
+ {"role": "system", "content": "You are creating heading3."},
110
  {"role": "user", "content": prompt_h3}
111
  ]
112
+ heading3_text = call_openai_chat(msgs_h3)
113
 
114
  return heading2_text, heading3_text
115
 
 
116
  def heading4_and_expansion_api(h1_text, h2_text, h3_text, original_input):
117
  """
118
+ API Çağrısı #3 => 2 chunk (Heading4, expansions/shorten)
119
+ chunk #5 => heading4
120
+ chunk #6 => expansions if <4000 words, or shorten if >10000
121
  """
122
+ # chunk #5 => heading4
123
  prompt_h4 = f"""
124
+ We have heading1,2,3.
125
+ Produce 'Heading 4: Summary and next steps for students.' ~1000 words at least.
126
+ Return heading4 only.
127
+ Context sample:
128
+ {h1_text[:1200]}
129
  """
130
+ msgs_h4 = [
131
+ {"role": "system", "content": "You are creating heading4."},
132
  {"role": "user", "content": prompt_h4}
133
  ]
134
+ heading4_text = call_openai_chat(msgs_h4)
135
 
136
+ # chunk #6 => expansions or shorten
137
+ prompt_expand = f"""
138
  We have 4 headings now:
139
+
140
  [Heading1]
141
  {h1_text}
142
 
 
149
  [Heading4]
150
  {heading4_text}
151
 
152
+ Combine them into one final text.
153
  If total < 4000 words => expand.
154
  If > 10000 => shorten.
155
+ Return final text only, merged.
156
+ Original input:
 
157
  {original_input}
158
  """
159
+ msgs_expand = [
160
+ {"role": "system", "content": "You ensure final word count 4000-10000."},
161
+ {"role": "user", "content": prompt_expand}
162
  ]
163
+ final_text = call_openai_chat(msgs_expand)
164
  return final_text
165
 
166
+ # ============== 3) Pipeline (6 chunk, 3 API çağrısı) ==============
167
+
168
+ def main_pipeline(input_txt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  """
170
+ 3 API Çağrısı:
171
+ 1) heading1_part1_and_part2 => chunk #1 + #2
172
+ 2) heading2_and_3_api => chunk #3 + #4
173
+ 3) heading4_and_expansion_api => chunk #5 + #6
174
  """
175
+ # API #1 => Heading1
176
+ heading1_text = heading1_part1_and_part2(input_txt)
177
 
178
+ # API #2 => Heading2, Heading3
179
  heading2_text, heading3_text = heading2_and_3_api(heading1_text)
180
 
181
+ # API #3 => Heading4 + expansions
182
  final_text = heading4_and_expansion_api(
183
+ h1_text=heading1_text,
184
+ h2_text=heading2_text,
185
+ h3_text=heading3_text,
186
+ original_input=input_txt
187
  )
188
+ return final_text
189
+
190
+ # ============== 4) Gradio Arayüz Fonksiyonları ==============
191
 
192
+ def run_pipeline(user_input_text):
193
+ """
194
+ Tek girdi: user_input_text (string).
195
+ Dönüş: final_html, info_label
196
+ """
197
+ if not user_input_text.strip():
198
+ return ("⚠️ Please provide some text!", "")
199
 
200
  # pipeline
201
+ final_text = main_pipeline(user_input_text)
202
+ # HTML
203
+ final_html = final_text.replace("\n","<br>")
204
+ # Word count
205
+ plain_text = re.sub(r"<.*?>","", final_text)
206
+ wcount = len(plain_text.split())
207
+
208
+ info = f"✅ Done. Final text ~{wcount} words (target 4000-10000)."
209
  return (final_html, info)
210
 
211
+ def build_app():
212
+ text_input = gr.Textbox(
213
+ lines=5,
214
+ label="Input Text (Minimum 4000 words, maximum 10000 words in final result)",
215
+ placeholder="Paste or type your input text here..."
216
  )
217
+ output_html = gr.HTML(label="Final Output")
218
+ output_info = gr.Label(label="Information (Word Count)")
219
 
220
  demo = gr.Interface(
221
+ fn=run_pipeline,
222
+ inputs=text_input,
223
+ outputs=[output_html, output_info],
224
+ title="6 Chunks with 3 API Calls (gpt-4o-mini)",
225
+ description=(
226
+ "Human-coded example. 3 API calls, each producing 2 chunks => 6 total.\n"
227
+ "Heading1 in 2 parts, then heading2+3, then heading4+expansions.\n"
228
+ "Ensures at least 4000 words, max 10000 words.\n"
229
+ )
230
  )
231
  return demo
232
 
233
+ if __name__ == "__main__":
234
+ app = build_app()
 
235
  app.launch()