Spaces:
Sleeping
Sleeping
| # app.py | |
| # -------------------------------------------------------------------------------- | |
| # Bu kod, tamamen geliştirici (insan) tarafından, öğretici ve eğitim amacıyla | |
| # yazılmıştır. GPT-4o-mini modelini kullanarak 4 başlık + 1 kontrol chunk (5 chunk) | |
| # şeklinde metin oluşturma akışını gösterir. Minimum 4000, maksimum 10000 kelime | |
| # üretilmesi hedeflenir. Kod, Gradio ile görsel bir arayüz sunar. | |
| # | |
| # NOT: Lütfen 'YOUR_API_KEY_HERE' kısmına kendi OpenAI API anahtarınızı ekleyin. | |
| # Bu kodda max_tokens 10,000, temperature 0.8 kullanarak uzun ve yaratıcı çıktılar | |
| # elde etmeyi amaçlıyoruz. | |
| # | |
| # Bu proje tamamen insan emeğiyle yazılmıştır, geliştirici tarafından tasarlanmıştır. | |
| # -------------------------------------------------------------------------------- | |
| import os | |
| import re | |
| import gradio as gr | |
| # Ek kütüphaneler | |
| try: | |
| from openai import OpenAI | |
| import tiktoken | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| except ImportError: | |
| raise ImportError("Lütfen 'openai', 'tiktoken', 'gradio', 'PyPDF2', 'python-docx' paketlerini kurun.") | |
| # -------------------------- OpenAI Ayarları -------------------------- | |
| # GPT-4o-mini modelini kullanacağımız API istemcisi: | |
| client = OpenAI(api_key="YOUR_API_KEY_HERE") | |
| def call_openai_chat(messages, max_tokens=10000, temperature=0.8): | |
| """ | |
| GPT-4o-mini modeline istek atar. | |
| - max_tokens=10000 -> uzun metinler | |
| - temperature=0.8 -> daha yaratıcı/uzun anlatımlar | |
| """ | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stop=None # Erken kesmeyi kapatalım | |
| ) | |
| return response.choices[0].message.content | |
| # ------------------------- Chunk Mantığı ------------------------- | |
| def heading1_part1(input_text): | |
| """ | |
| Chunk #1 -> Heading 1'in ilk parçası. | |
| Kullanıcıdan alınan metin ile kısmi bir "Introductory overview" üretir. | |
| """ | |
| user_content = f""" | |
| We have some input text. We want the first part of 'Heading 1: Introductory overview of input'. | |
| Please produce a partial text focusing on an introduction (about 1000+ words). | |
| Do NOT finalize heading 1 yet, just a partial introduction. | |
| Input text: | |
| {input_text} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant generating partial text for heading #1."}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| return call_openai_chat(messages) | |
| def heading1_part2(h1_part1_text): | |
| """ | |
| Chunk #2 -> Heading 1'in ikinci parçası. | |
| Ilk parçayı genişleterek final haline getirir (örn. 2000+ words). | |
| """ | |
| user_content = f""" | |
| Below is the partial text for heading 1: | |
| {h1_part1_text} | |
| Now finalize heading 1 by merging expansions or clarifications. | |
| Ensure heading 1 is at least 2000 words in total. Add depth and examples. | |
| Return only the final text for heading 1. | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are finalizing heading #1."}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| return call_openai_chat(messages) | |
| def single_heading_chunk(existing_text, heading_title): | |
| """ | |
| Chunk #3 veya #4 -> Heading 2 veya Heading 3. Tek seferde ~1000 kelime oluşturmayı hedefleyelim. | |
| existing_text: heading1_text, vs. referans olarak kullanılabilir. | |
| """ | |
| user_content = f""" | |
| We have some text for context (heading1 or previous content). | |
| Please produce a new heading: '{heading_title}' with around 1000+ words if possible. | |
| Do not produce final expansions for other headings. | |
| Context: | |
| {existing_text} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are generating a single-chunk heading text."}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| return call_openai_chat(messages) | |
| def heading4_and_expansions(heading1_text, heading2_text, heading3_text, input_text): | |
| """ | |
| Chunk #5 -> Heading 4, expansions if total <4000 words, or shorten if >10000 words. | |
| Tek seferde final text döndürür. | |
| """ | |
| user_prompt = f""" | |
| We have 3 headings so far: | |
| [Heading 1] | |
| {heading1_text} | |
| [Heading 2] | |
| {heading2_text} | |
| [Heading 3] | |
| (Will be produced next, or we have it if created) | |
| Actually, produce Heading 4: 'Summary and next steps for students.' | |
| Then combine headings 1,2,3,4 into one final text. | |
| If the entire text (4 headings) is under 4000 words, expand or add content | |
| to any heading until we reach 4000+ words. | |
| If above 10000 words, shorten while keeping crucial details. | |
| Return the final text with headings 1,2,3,4 merged. | |
| No separate block, but unify expansions or edits. | |
| You can also use original input context: | |
| {input_text} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are finalizing heading #4 and ensuring total word count 4000-10000."}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| return call_openai_chat(messages) | |
| # -------------------- Dosya Okuma Yardımcı Fonksiyonlar -------------------- | |
| def read_pdf(file_path: str) -> str: | |
| """Reads text from a PDF file (simple approach).""" | |
| text = "" | |
| with open(file_path, "rb") as f: | |
| reader = PdfReader(f) | |
| for page in reader.pages: | |
| page_txt = page.extract_text() | |
| if page_txt: | |
| text += page_txt | |
| return text | |
| def read_docx(file_path: str) -> str: | |
| """Reads text from a DOCX file.""" | |
| doc = Document(file_path) | |
| paragraphs = [] | |
| for para in doc.paragraphs: | |
| paragraphs.append(para.text) | |
| return "\n".join(paragraphs) | |
| def read_txt(file_path: str) -> str: | |
| """Reads text from a .txt file.""" | |
| with open(file_path, "r", encoding="utf-8", errors="ignore") as f: | |
| return f.read() | |
| # --------------- Gradio Arayüz Fonksiyonları --------------- | |
| def process_input_text_or_file(txt_input, file_obj): | |
| """ | |
| Okunan metni döndürür. | |
| txt_input: text (str) | |
| file_obj: gradio üzerinden gelen file nesnesi | |
| """ | |
| # Eğer dosya yüklenmişse | |
| if file_obj is not None: | |
| # file_obj genelde (name, size, data vb.) barındırır. | |
| file_name = file_obj.name | |
| content = file_obj.read() # raw bytes | |
| with open(file_name, "wb") as tmp: | |
| tmp.write(content) | |
| ext = file_name.lower().split(".")[-1] | |
| if ext == "pdf": | |
| return read_pdf(file_name) | |
| elif ext == "docx": | |
| return read_docx(file_name) | |
| elif ext == "txt": | |
| return read_txt(file_name) | |
| else: | |
| # fallback decode | |
| return content.decode("utf-8", errors="ignore") | |
| else: | |
| # Dosya yoksa, metin kutusunu döndür | |
| return txt_input.strip() | |
| def generate_5_chunks(input_txt): | |
| """ | |
| 1) Heading1 part1 (chunk #1) | |
| 2) Heading1 part2 (chunk #2) | |
| 3) Heading2 (chunk #3) | |
| 4) Heading3 (chunk #4) | |
| 5) Heading4 + expansions => final text (chunk #5) | |
| """ | |
| # Chunk #1: heading1 part1 | |
| h1_part1 = heading1_part1(input_txt) | |
| # Chunk #2: heading1 part2 => finalize heading 1 | |
| heading1_final = heading1_part2(h1_part1) | |
| # Chunk #3: heading2 | |
| heading2_final = single_heading_chunk(heading1_final, "Heading 2: Detailed explanation of common risks.") | |
| # Chunk #4: heading3 | |
| heading3_final = single_heading_chunk(heading1_final, "Heading 3: Practical examples and solutions.") | |
| # Chunk #5: heading4 + expansions | |
| final_text = heading4_and_expansions(heading1_final, heading2_final, heading3_final, input_txt) | |
| # HTML için .replace | |
| final_html = final_text.replace("\n", "<br>") | |
| # Kelime sayısı | |
| plain_text = re.sub(r"<.*?>", "", final_text) | |
| wcount = len(plain_text.split()) | |
| # Sonuç | |
| info = f"✅ Done. The final text is approx {wcount} words." | |
| return final_html, info | |
| def gradio_interface(txt_input, file_upload): | |
| # Tek fonksiyon, hem input hem output | |
| read_content = process_input_text_or_file(txt_input, file_upload) | |
| if not read_content: | |
| return "⚠️ Please provide text or file input.", "" | |
| # 5-chunk workflow | |
| final_html, info = generate_5_chunks(read_content) | |
| return final_html, info | |
| # --------------- Gradio Demo --------------- | |
| def build_gradio_app(): | |
| # "inputs" parametresine, txt ve file girişi ekleyeceğiz | |
| text_input = gr.Textbox( | |
| lines=5, | |
| label="Text Input (Optional)", | |
| placeholder="Enter some text or upload a file..." | |
| ) | |
| file_input = gr.File( | |
| label="Upload File (PDF/DOCX/TXT)", | |
| file_types=[".pdf", ".docx", ".txt"], | |
| optional=True | |
| ) | |
| # outputs: final HTML + info | |
| output_html = gr.HTML(label="Generated Output (Min 4000 words, Max 10000 words)") | |
| info_label = gr.Label(label="Process Info (Word Count etc.)") | |
| # Arayüz | |
| demo = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[text_input, file_input], | |
| outputs=[output_html, info_label], | |
| title="5-Chunks GPT-4o-mini (4000-10000 words) Example", | |
| description=( | |
| "A demonstration of chunk-based approach with GPT-4o-mini model. " | |
| "We produce 4 headings: " | |
| "Heading1(part1+part2), Heading2, Heading3, and then Heading4 & expansions " | |
| "if total words < 4000 or shorten if > 10000." | |
| "\n(Coded by a human developer, not AI. For educational purposes.)" | |
| ) | |
| ) | |
| return demo | |
| # app.py main | |
| if __name__ == "__main__": | |
| # Gradio app | |
| demo_app = build_gradio_app() | |
| # genelde local (127.0.0.1:7860) host | |
| demo_app.launch() | |