bahakizil commited on
Commit
2e6c205
·
verified ·
1 Parent(s): cc0c2a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -163
app.py CHANGED
@@ -24,11 +24,10 @@ except ImportError:
24
  # ============== 1) OPENAI API İstemcisi ================
25
  client = OpenAI(api_key="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")
26
 
 
27
  def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
28
  """
29
- Tek seferde gpt-4o-mini modeline istek atar.
30
- - max_tokens=10000 => uzun çıktı
31
- - temperature=0.8 => daha yaratıcı/uzun
32
  """
33
  response = client.chat.completions.create(
34
  model="gpt-4o-mini",
@@ -39,249 +38,235 @@ def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
39
  )
40
  return response.choices[0].message.content
41
 
42
- # ============== 2) CHUNK #1 ve #2 => (API Çağrısı #1) ================
43
- def heading1_part1_api_call(input_text):
44
  """
45
- Chunk #1 -> Heading 1 (Part 1)
46
- Chunk #2 -> Heading 1 (Part 2) => finalize
47
- İki chunk tek fonksiyon içinde ardışık olarak çalıştıran 1 API call.
48
- Aslında 2 istek yapar ama mantıkta tek 'block' diyebiliriz.
49
  """
50
  # Chunk #1
51
- user_prompt_1 = f"""
52
- We have some input. We want 'Heading 1: Introductory overview of input' in 2 parts.
53
- Now produce PART 1 of heading 1 (~1000+ words). Return partial text, do NOT finalize.
 
54
  Input:
55
  {input_text}
56
  """
57
- messages_1 = [
58
- {"role": "system", "content": "You are an AI assistant creating heading 1 (part 1)."},
59
- {"role": "user", "content": user_prompt_1}
60
  ]
61
- h1_part1_text = call_openai_chat(messages_1)
62
 
63
  # Chunk #2
64
- user_prompt_2 = f"""
65
- We have partial heading 1:
66
- {h1_part1_text}
67
 
68
- Now finalize heading 1 (PART 2).
69
- Ensure total heading1 is at least 2000 words.
70
- Add expansions or clarifications. Return the final heading1 text only.
71
  """
72
- messages_2 = [
73
  {"role": "system", "content": "You are finalizing heading #1."},
74
- {"role": "user", "content": user_prompt_2}
75
  ]
76
- h1_final_text = call_openai_chat(messages_2)
77
 
78
- return h1_final_text
79
 
80
- # ============== 3) CHUNK #3 ve #4 => (API Çağrısı #2) ================
81
- def heading2_3_api_call(heading1_text):
82
  """
83
- Chunk #3 => Heading 2
84
- Chunk #4 => Heading 3
85
- Tek fonksiyonda ardışık 2 istek => 2 chunk
86
  """
87
- # Chunk #3 => Heading2
88
  prompt_h2 = f"""
89
- We have heading1 for context. Now produce 'Heading 2: Detailed explanation of common risks.'
90
- Aim ~1000+ words if possible. Return final heading2 text only.
91
-
92
  Context:
93
- {heading1_text}
94
  """
95
- messages_h2 = [
96
- {"role": "system", "content": "You are an AI assistant creating heading2."},
97
  {"role": "user", "content": prompt_h2}
98
  ]
99
- heading2_text = call_openai_chat(messages_h2)
100
 
101
- # Chunk #4 => Heading3
102
  prompt_h3 = f"""
103
- We have heading1 for context. Now produce 'Heading 3: Practical examples and solutions.'
104
- Aim ~1000+ words if possible. Return final heading3 text only.
105
-
106
  Context:
107
- {heading1_text}
108
  """
109
- messages_h3 = [
110
- {"role": "system", "content": "You are an AI assistant creating heading3."},
111
  {"role": "user", "content": prompt_h3}
112
  ]
113
- heading3_text = call_openai_chat(messages_h3)
114
 
115
  return heading2_text, heading3_text
116
 
117
- # ============== 4) CHUNK #5 ve #6 => (API Çağrısı #3) ================
118
- def heading4_expansion_api_call(heading1_text, heading2_text, heading3_text, input_text):
119
  """
120
- Chunk #5 => Heading4
121
- Chunk #6 => expansions if <4000 words or shorten if >10000 words
122
  """
123
  # Chunk #5 => heading4
124
  prompt_h4 = f"""
125
- We have heading1,2,3. Now produce 'Heading 4: Summary and next steps for students.'
126
- At least 1000 words if possible. Return heading4 text only.
127
  Context:
128
- Heading1 = {heading1_text[:2000]}...
129
- Heading2 = ...
130
- Heading3 = ...
131
  """
132
- messages_h4 = [
133
- {"role": "system", "content": "You are an AI assistant creating heading4."},
134
  {"role": "user", "content": prompt_h4}
135
  ]
136
- heading4_text = call_openai_chat(messages_h4)
137
-
138
- # Chunk #6 => expansions or shorten
139
- prompt_expand = f"""
140
- We have 4 headings:
141
 
 
 
 
142
  [Heading1]
143
- {heading1_text}
144
 
145
  [Heading2]
146
- {heading2_text}
147
 
148
  [Heading3]
149
- {heading3_text}
150
 
151
  [Heading4]
152
  {heading4_text}
153
 
154
- Now combine them into ONE final text.
155
  If total < 4000 words => expand.
156
- If > 10000 words => shorten.
157
- Return final text only, merged.
158
- Add expansions to any heading if short, or remove details if too long.
159
 
160
- Original input:
161
- {input_text}
162
  """
163
- messages_expand = [
164
- {"role": "system", "content": "You are an AI assistant ensuring total word count 4000-10000."},
165
- {"role": "user", "content": prompt_expand}
166
  ]
167
- final_text = call_openai_chat(messages_expand)
168
  return final_text
169
 
170
- # ============== Dosya Okuma Fonksiyonları ================
171
- def read_pdf(file_path: str) -> str:
172
- text = ""
173
- with open(file_path, "rb") as f:
174
  pdf = PdfReader(f)
175
  for page in pdf.pages:
176
- txt = page.extract_text()
177
- if txt:
178
- text += txt
179
- return text
180
-
181
- def read_docx(file_path: str) -> str:
182
- doc = Document(file_path)
183
- paragraphs = []
184
  for para in doc.paragraphs:
185
- paragraphs.append(para.text)
186
- return "\n".join(paragraphs)
187
 
188
- def read_txt(file_path: str) -> str:
189
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
190
  return f.read()
191
 
192
- # ============== Gradio Fonksiyon ================
193
- def process_input_text(file_obj, txt):
194
  """
195
- Dosya yüklenmişse okur, yoksa metin kutusunu alır
 
196
  """
197
  if file_obj is not None:
198
- # file_obj => gradio üzerinden
199
- filename = file_obj.name
200
- content = file_obj.read()
201
- with open(filename, "wb") as tmp:
202
- tmp.write(content)
203
-
204
- ext = filename.lower().split('.')[-1]
205
- if ext == "pdf":
206
- return read_pdf(filename)
207
- elif ext == "docx":
208
- return read_docx(filename)
209
- elif ext == "txt":
210
- return read_txt(filename)
 
 
 
 
211
  else:
212
  # fallback decode
213
- return content.decode("utf-8", errors="ignore")
214
  else:
215
- return txt.strip()
216
 
217
- def main_pipeline(input_text):
 
218
  """
219
- 3 API çağrısı, 6 chunk:
220
- 1) heading1_part1_api_call => chunk #1, chunk #2
221
- 2) heading2_3_api_call => chunk #3, chunk #4
222
- 3) heading4_expansion_api_call => chunk #5, chunk #6
223
  """
224
- # API Call #1 => chunk#1, chunk#2
225
- heading1_text = heading1_part1_api_call(input_text)
226
-
227
- # API Call #2 => chunk#3, chunk#4
228
- heading2_text, heading3_text = heading2_3_api_call(heading1_text)
229
-
230
- # API Call #3 => chunk#5, chunk#6
231
- final_text = heading4_expansion_api_call(
232
- heading1_text=heading1_text,
233
- heading2_text=heading2_text,
234
- heading3_text=heading3_text,
235
- input_text=input_text
236
- )
237
-
238
- # HTML
239
- final_html = final_text.replace("\n","<br>")
240
- # Word count
241
- plain = re.sub(r"<.*?>","", final_text)
242
- wcount = len(plain.split())
243
 
244
- info = f"✅ Done. Final text has ~{wcount} words."
245
- return final_html, info
246
 
247
- def run_gradio_app(user_text, file):
248
- content = process_input_text(file, user_text)
 
 
 
 
 
 
 
 
 
 
249
  if not content:
250
  return ("⚠️ Please provide text or file input!", "")
251
 
252
- final_html, info = main_pipeline(content)
 
 
 
253
  return (final_html, info)
254
 
255
-
256
- # ============== Gradio Arayüz ================
257
- def build_app():
258
- text_input = gr.Textbox(
259
- lines=5,
260
- label="Text Input (Optional)",
261
- placeholder="Write or paste text here..."
262
  )
263
- file_input = gr.File(
264
  label="Upload PDF/DOCX/TXT",
265
  file_types=[".pdf",".docx",".txt"]
266
  )
267
- output_html = gr.HTML(label="Final Output (4 headings, 4000-10000 words)")
268
- info_label = gr.Label(label="Information")
269
 
270
  demo = gr.Interface(
271
- fn=run_gradio_app,
272
- inputs=[text_input, file_input],
273
- outputs=[output_html, info_label],
274
- title="6-Chunks in 3 API Calls (gpt-4o-mini)",
275
- description=(
276
- "Human-coded example with 3 separate API calls to produce 6 chunks:\n"
277
- "API #1 => chunk#1-2 (Heading1 in 2 parts)\n"
278
- "API #2 => chunk#3-4 (Heading2 & Heading3)\n"
279
- "API #3 => chunk#5-6 (Heading4 & expansions for 4000-10000 words range)\n"
280
- "(No AI used in writing this code. It's purely developer-coded!)"
281
- )
282
  )
283
  return demo
284
 
285
- if __name__ == "__main__":
286
- app = build_app()
287
- app.launch()
 
 
24
  # ============== 1) OPENAI API İstemcisi ================
25
  client = OpenAI(api_key="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")
26
 
27
+
28
  def call_openai_chat(messages, max_tokens=10000, temperature=0.8):
29
  """
30
+ GPT-4o-mini modeline istek: max_tokens=10000 => uzun metinler
 
 
31
  """
32
  response = client.chat.completions.create(
33
  model="gpt-4o-mini",
 
38
  )
39
  return response.choices[0].message.content
40
 
41
+ ### 2) Heading 1 (chunk #1 + chunk #2) => API Call #1
42
+ def heading1_part1_and_part2_api(input_text):
43
  """
44
+ Ilk cagirida Heading1 icin 2 chunk (part1, part2) uretilir.
45
+ 1. chunk => partial text
46
+ 2. chunk => finalize
 
47
  """
48
  # Chunk #1
49
+ prompt1 = f"""
50
+ We want Heading 1 in two parts.
51
+ PART 1: ~1000+ words introduction.
52
+ DO NOT finalize.
53
  Input:
54
  {input_text}
55
  """
56
+ msg1 = [
57
+ {"role": "system", "content": "You are an AI assistant creating heading1 part1."},
58
+ {"role": "user", "content": prompt1}
59
  ]
60
+ part1_text = call_openai_chat(msg1)
61
 
62
  # Chunk #2
63
+ prompt2 = f"""
64
+ We have partial heading1:
65
+ {part1_text}
66
 
67
+ Now finalize heading1. Make sure total ~2000+ words.
68
+ Return final heading1 text only.
 
69
  """
70
+ msg2 = [
71
  {"role": "system", "content": "You are finalizing heading #1."},
72
+ {"role": "user", "content": prompt2}
73
  ]
74
+ heading1_text = call_openai_chat(msg2)
75
 
76
+ return heading1_text
77
 
78
+ ### 3) Heading 2 + Heading 3 => API Call #2 (chunk #3 + chunk #4)
79
+ def heading2_and_3_api(heading1_text):
80
  """
81
+ Tek cagirida 2 chunk:
82
+ - chunk#3 => heading2
83
+ - chunk#4 => heading3
84
  """
85
+ # Heading2
86
  prompt_h2 = f"""
87
+ We have heading1 for context.
88
+ Produce 'Heading 2: Detailed explanation of common risks.'
89
+ ~1000+ words. Return only heading2 text.
90
  Context:
91
+ {heading1_text[:1500]}...
92
  """
93
+ msg_h2 = [
94
+ {"role": "system", "content": "You are AI assistant creating heading2."},
95
  {"role": "user", "content": prompt_h2}
96
  ]
97
+ heading2_text = call_openai_chat(msg_h2)
98
 
99
+ # Heading3
100
  prompt_h3 = f"""
101
+ We have heading1 for context.
102
+ Produce 'Heading 3: Practical examples and solutions.'
103
+ ~1000+ words. Return only heading3 text.
104
  Context:
105
+ {heading1_text[:1500]}...
106
  """
107
+ msg_h3 = [
108
+ {"role": "system", "content": "You are AI assistant creating heading3."},
109
  {"role": "user", "content": prompt_h3}
110
  ]
111
+ heading3_text = call_openai_chat(msg_h3)
112
 
113
  return heading2_text, heading3_text
114
 
115
+ ### 4) Heading4 + expansions => API Call #3 (chunk #5 + chunk #6)
116
+ def heading4_and_expansion_api(h1_text, h2_text, h3_text, original_input):
117
  """
118
+ - chunk#5 => heading4
119
+ - chunk#6 => expansions if <4000 words or shorten if >10000
120
  """
121
  # Chunk #5 => heading4
122
  prompt_h4 = f"""
123
+ We have heading1,2,3. Now produce heading4: 'Summary and next steps'
124
+ At least ~1000 words. Return only heading4 text.
125
  Context:
126
+ {h1_text[:1200]}...
 
 
127
  """
128
+ msg_h4 = [
129
+ {"role": "system", "content": "You are AI assistant creating heading4."},
130
  {"role": "user", "content": prompt_h4}
131
  ]
132
+ heading4_text = call_openai_chat(msg_h4)
 
 
 
 
133
 
134
+ # Chunk #6 => expansions/shorten
135
+ prompt_final = f"""
136
+ We have 4 headings now:
137
  [Heading1]
138
+ {h1_text}
139
 
140
  [Heading2]
141
+ {h2_text}
142
 
143
  [Heading3]
144
+ {h3_text}
145
 
146
  [Heading4]
147
  {heading4_text}
148
 
149
+ Combine them into ONE final text.
150
  If total < 4000 words => expand.
151
+ If > 10000 => shorten.
152
+ Return final text only.
 
153
 
154
+ Original input for references:
155
+ {original_input}
156
  """
157
+ msg_final = [
158
+ {"role": "system", "content": "You are ensuring final text is 4000-10000 words."},
159
+ {"role": "user", "content": prompt_final}
160
  ]
161
+ final_text = call_openai_chat(msg_final)
162
  return final_text
163
 
164
+ ### 5) Dosya Okuma
165
+ def read_pdf(path:str) -> str:
166
+ txt = ""
167
+ with open(path,"rb") as f:
168
  pdf = PdfReader(f)
169
  for page in pdf.pages:
170
+ p_txt = page.extract_text()
171
+ if p_txt:
172
+ txt += p_txt
173
+ return txt
174
+
175
+ def read_docx(path:str) -> str:
176
+ doc = Document(path)
177
+ result = []
178
  for para in doc.paragraphs:
179
+ result.append(para.text)
180
+ return "\n".join(result)
181
 
182
+ def read_txt(path:str) -> str:
183
+ with open(path,"r",encoding="utf-8",errors="ignore") as f:
184
  return f.read()
185
 
186
+ def read_input_file_or_text(file_obj, text_str):
 
187
  """
188
+ Gradio 'File' bileşeni => dictionary, .name, .data yoksa
189
+ HF versiyonuna göre .get('data') vs.
190
  """
191
  if file_obj is not None:
192
+ file_name = file_obj.name
193
+ file_data = file_obj.get("data",None)
194
+ if not file_data:
195
+ # Bazı Gradio versiyonlarında file_obj kendisi string olabilir
196
+ # or "NamedString"
197
+ return file_obj.name or ""
198
+
199
+ with open(file_name, "wb") as tmp:
200
+ tmp.write(file_data)
201
+
202
+ ext = file_name.lower().split(".")[-1]
203
+ if ext=="pdf":
204
+ return read_pdf(file_name)
205
+ elif ext=="docx":
206
+ return read_docx(file_name)
207
+ elif ext=="txt":
208
+ return read_txt(file_name)
209
  else:
210
  # fallback decode
211
+ return file_data.decode("utf-8", errors="ignore")
212
  else:
213
+ return text_str.strip()
214
 
215
+ ### 6) pipeline
216
+ def main_pipeline(input_content):
217
  """
218
+ 3 api call => 6 chunk
 
 
 
219
  """
220
+ # API Call #1 => heading1 (part1+part2)
221
+ heading1_text = heading1_part1_and_part2_api(input_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ # API Call #2 => heading2, heading3
224
+ heading2_text, heading3_text = heading2_and_3_api(heading1_text)
225
 
226
+ # API Call #3 => heading4 + expansions
227
+ final_text = heading4_and_expansion_api(
228
+ heading1_text, heading2_text, heading3_text, input_content
229
+ )
230
+ # Son
231
+ word_count = len(re.sub(r"<.*?>","", final_text).split())
232
+ return final_text, word_count
233
+
234
+ ### 7) Gradio Fonksiyon
235
+ def run_app(user_text, user_file):
236
+ # Dosya veya metin
237
+ content = read_input_file_or_text(user_file, user_text)
238
  if not content:
239
  return ("⚠️ Please provide text or file input!", "")
240
 
241
+ # pipeline
242
+ final_text, wcount = main_pipeline(content)
243
+ final_html = final_text.replace("\n", "<br>")
244
+ info = f"✅ Done. The final text has approx {wcount} words."
245
  return (final_html, info)
246
 
247
+ ### 8) Gradio Arayüz
248
+ def build_gradio_interface():
249
+ txt_box = gr.Textbox(
250
+ lines=5, label="Text Input (optional)",
251
+ placeholder="Write/paste text here or upload .pdf/.docx/.txt"
 
 
252
  )
253
+ file_comp = gr.File(
254
  label="Upload PDF/DOCX/TXT",
255
  file_types=[".pdf",".docx",".txt"]
256
  )
257
+ out_html = gr.HTML(label="Final Output")
258
+ out_info = gr.Label(label="Process Info")
259
 
260
  demo = gr.Interface(
261
+ fn=run_app,
262
+ inputs=[txt_box, file_comp],
263
+ outputs=[out_html, out_info],
264
+ title="GPT-4o-mini 3-API-Calls, 6-chunk approach",
265
+ description="Human-coded example, 3 separate API calls => 6 chunks total. Ensures 4k-10k words."
 
 
 
 
 
 
266
  )
267
  return demo
268
 
269
+ if __name__=="__main__":
270
+ # App
271
+ app = build_gradio_interface()
272
+ app.launch()