prasannahf commited on
Commit
032fa61
Β·
verified Β·
1 Parent(s): 07eaef5

Upload gradio_hf_Blog_gen.py

Browse files
Files changed (1) hide show
  1. gradio_hf_Blog_gen.py +179 -0
gradio_hf_Blog_gen.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import traceback
4
+ import torch
5
+ from langgraph.graph import StateGraph, START, END
6
+ from langchain.schema import HumanMessage
7
+ from langchain_groq import ChatGroq
8
+ from langsmith import traceable
9
+ from typing import TypedDict
10
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
11
+ from google.colab import userdata # Only needed in Google Colab
12
+
13
+ import os
14
+
15
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Get from Hugging Face secrets
16
+ LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
17
+
18
+
19
+ # βœ… Set environment variables
20
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
21
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
22
+ os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
23
+
24
+ # βœ… Initialize Groq LLM (for content generation)
25
+ llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="mixtral-8x7b-32768")
26
+
27
+ # βœ… Define State for LangGraph
28
+ class State(TypedDict):
29
+ topic: str
30
+ titles: list
31
+ selected_title: str
32
+ content: str
33
+ summary: str
34
+ translated_content: str
35
+ tone: str
36
+ language: str
37
+
38
+ # βœ… Function to generate multiple blog titles using Groq
39
+ @traceable(name="Generate Titles")
40
+ def generate_titles(data):
41
+ topic = data.get("topic", "")
42
+ prompt = f"Generate **three short and catchy blog titles** for the topic: {topic}. Each title should be under 10 words. Separate them with new lines."
43
+
44
+ response = llm([HumanMessage(content=prompt)])
45
+ titles = response.content.strip().split("\n") # Get three titles as a list
46
+
47
+ return {"titles": titles, "selected_title": titles[0]} # Default to first title
48
+
49
+ # βœ… Function to generate blog content with tone using Groq
50
+ @traceable(name="Generate Content")
51
+ def generate_content(data):
52
+ title = data.get("selected_title", "")
53
+ tone = data.get("tone", "Neutral")
54
+ prompt = f"Write a detailed and engaging blog post in a {tone} tone based on the title: {title}"
55
+
56
+ response = llm([HumanMessage(content=prompt)])
57
+ return {"content": response.content.strip()}
58
+
59
+ # βœ… Function to generate summary using Groq
60
+ @traceable(name="Generate Summary")
61
+ def generate_summary(data):
62
+ content = data.get("content", "")
63
+ prompt = f"Summarize this blog post in a short and engaging way: {content}"
64
+
65
+ response = llm([HumanMessage(content=prompt)])
66
+ return {"summary": response.content.strip()}
67
+
68
+ # βœ… Load translation model (NLLB-200)
69
+ def load_translation_model():
70
+ model_name = "facebook/nllb-200-distilled-600M" # Efficient model for 200+ languages
71
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
72
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
73
+ return tokenizer, model
74
+
75
+ tokenizer, model = load_translation_model()
76
+
77
+ # βœ… Language codes for NLLB-200
78
+ language_codes = {
79
+ "English": "eng_Latn",
80
+ "Hindi": "hin_Deva",
81
+ "Telugu": "tel_Telu",
82
+ "Spanish": "spa_Latn",
83
+ "French": "fra_Latn"
84
+ }
85
+
86
+ # βœ… Function to translate blog content using NLLB-200
87
+ @traceable(name="Translate Content")
88
+ def translate_content(data):
89
+ content = data.get("content", "")
90
+ language = data.get("language", "English")
91
+
92
+ if language == "English":
93
+ return {"translated_content": content} # No translation needed
94
+
95
+ tgt_lang = language_codes.get(language, "eng_Latn") # Default to English if not found
96
+
97
+ # βœ… Split content into smaller chunks (Avoids token limit issues)
98
+ max_length = 512 # Adjust based on model limitations
99
+ sentences = content.split(". ") # Split at sentence level
100
+ chunks = []
101
+ current_chunk = ""
102
+
103
+ for sentence in sentences:
104
+ if len(current_chunk) + len(sentence) < max_length:
105
+ current_chunk += sentence + ". "
106
+ else:
107
+ chunks.append(current_chunk.strip())
108
+ current_chunk = sentence + ". "
109
+
110
+ if current_chunk:
111
+ chunks.append(current_chunk.strip())
112
+
113
+ # βœ… Translate each chunk separately and combine results
114
+ translated_chunks = []
115
+ for chunk in chunks:
116
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True)
117
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
118
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
119
+ translated_chunks.append(translated_text.strip())
120
+
121
+ # βœ… Combine all translated chunks into final text
122
+ full_translation = " ".join(translated_chunks)
123
+
124
+ return {"translated_content": full_translation}
125
+
126
+ # βœ… Create LangGraph Workflow
127
+ def make_blog_generation_graph():
128
+ """Create a LangGraph workflow for Blog Generation"""
129
+ graph_workflow = StateGraph(State)
130
+
131
+ # Define Nodes
132
+ graph_workflow.add_node("title_generation", generate_titles)
133
+ graph_workflow.add_node("content_generation", generate_content)
134
+ graph_workflow.add_node("summary_generation", generate_summary)
135
+ graph_workflow.add_node("translation", translate_content) # Ensures only blog content is translated
136
+
137
+ # Define Execution Order
138
+ graph_workflow.add_edge(START, "title_generation")
139
+ graph_workflow.add_edge("title_generation", "content_generation")
140
+ graph_workflow.add_edge("content_generation", "summary_generation") # Summary only generated from content
141
+ graph_workflow.add_edge("content_generation", "translation") # Translation happens for content only
142
+ graph_workflow.add_edge("summary_generation", END)
143
+ graph_workflow.add_edge("translation", END)
144
+
145
+ return graph_workflow.compile()
146
+
147
+ # βœ… Gradio Interface with "Why Translate?" Section
148
+ with gr.Blocks() as app:
149
+ gr.Markdown(
150
+ """
151
+ ### 🌍 Why Translate?
152
+ We provide translation to make the blog content **accessible to a global audience**.
153
+ - πŸ—£οΈ **Multilingual Support** – Read blogs in your preferred language.
154
+ - 🌎 **Expand Reach** – Reach international readers.
155
+ - βœ… **Better Understanding** – Enjoy content in a language you're comfortable with.
156
+ - πŸ€– **AI-Powered Accuracy** – Uses advanced AI models for precise translation.
157
+ """
158
+ )
159
+
160
+ gr.Interface(
161
+ fn=generate_blog,
162
+ inputs=[
163
+ gr.Textbox(label="Enter a topic for your blog"),
164
+ gr.Dropdown(["Neutral", "Formal", "Casual", "Persuasive", "Humorous"], label="Select Blog Tone", value="Neutral"),
165
+ gr.Dropdown(["English", "Hindi", "Telugu", "Spanish", "French"], label="Translate Blog To", value="English"),
166
+ ],
167
+ outputs=[
168
+ gr.Textbox(label="Suggested Blog Titles (Choose One)"), # Displays multiple title suggestions
169
+ gr.Textbox(label="Selected Blog Title"),
170
+ gr.Textbox(label="Generated Blog Content"),
171
+ gr.Textbox(label="Blog Summary"),
172
+ gr.Textbox(label="Translated Blog Content"),
173
+ ],
174
+ title="πŸš€ AI-Powered Blog Generator with Multi-Title Suggestions",
175
+ description="Generate high-quality blogs using Groq AI, customize tone, translate using NLLB-200, and get interactive summaries. Select from multiple title suggestions!",
176
+ )
177
+
178
+ # βœ… Launch the Gradio App
179
+ app.launch(share=True)