prasannahf commited on
Commit
c540a5b
Β·
verified Β·
1 Parent(s): 5b88e2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -179
app.py CHANGED
@@ -1,179 +1,178 @@
1
- import os
2
- import gradio as gr
3
- import traceback
4
- import torch
5
- from langgraph.graph import StateGraph, START, END
6
- from langchain.schema import HumanMessage
7
- from langchain_groq import ChatGroq
8
- from langsmith import traceable
9
- from typing import TypedDict
10
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
11
- from google.colab import userdata # Only needed in Google Colab
12
-
13
- import os
14
-
15
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Get from Hugging Face secrets
16
- LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
17
-
18
-
19
- # βœ… Set environment variables
20
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
21
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
22
- os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
23
-
24
- # βœ… Initialize Groq LLM (for content generation)
25
- llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="mixtral-8x7b-32768")
26
-
27
- # βœ… Define State for LangGraph
28
- class State(TypedDict):
29
- topic: str
30
- titles: list
31
- selected_title: str
32
- content: str
33
- summary: str
34
- translated_content: str
35
- tone: str
36
- language: str
37
-
38
- # βœ… Function to generate multiple blog titles using Groq
39
- @traceable(name="Generate Titles")
40
- def generate_titles(data):
41
- topic = data.get("topic", "")
42
- prompt = f"Generate **three short and catchy blog titles** for the topic: {topic}. Each title should be under 10 words. Separate them with new lines."
43
-
44
- response = llm([HumanMessage(content=prompt)])
45
- titles = response.content.strip().split("\n") # Get three titles as a list
46
-
47
- return {"titles": titles, "selected_title": titles[0]} # Default to first title
48
-
49
- # βœ… Function to generate blog content with tone using Groq
50
- @traceable(name="Generate Content")
51
- def generate_content(data):
52
- title = data.get("selected_title", "")
53
- tone = data.get("tone", "Neutral")
54
- prompt = f"Write a detailed and engaging blog post in a {tone} tone based on the title: {title}"
55
-
56
- response = llm([HumanMessage(content=prompt)])
57
- return {"content": response.content.strip()}
58
-
59
- # βœ… Function to generate summary using Groq
60
- @traceable(name="Generate Summary")
61
- def generate_summary(data):
62
- content = data.get("content", "")
63
- prompt = f"Summarize this blog post in a short and engaging way: {content}"
64
-
65
- response = llm([HumanMessage(content=prompt)])
66
- return {"summary": response.content.strip()}
67
-
68
- # βœ… Load translation model (NLLB-200)
69
- def load_translation_model():
70
- model_name = "facebook/nllb-200-distilled-600M" # Efficient model for 200+ languages
71
- tokenizer = AutoTokenizer.from_pretrained(model_name)
72
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
73
- return tokenizer, model
74
-
75
- tokenizer, model = load_translation_model()
76
-
77
- # βœ… Language codes for NLLB-200
78
- language_codes = {
79
- "English": "eng_Latn",
80
- "Hindi": "hin_Deva",
81
- "Telugu": "tel_Telu",
82
- "Spanish": "spa_Latn",
83
- "French": "fra_Latn"
84
- }
85
-
86
- # βœ… Function to translate blog content using NLLB-200
87
- @traceable(name="Translate Content")
88
- def translate_content(data):
89
- content = data.get("content", "")
90
- language = data.get("language", "English")
91
-
92
- if language == "English":
93
- return {"translated_content": content} # No translation needed
94
-
95
- tgt_lang = language_codes.get(language, "eng_Latn") # Default to English if not found
96
-
97
- # βœ… Split content into smaller chunks (Avoids token limit issues)
98
- max_length = 512 # Adjust based on model limitations
99
- sentences = content.split(". ") # Split at sentence level
100
- chunks = []
101
- current_chunk = ""
102
-
103
- for sentence in sentences:
104
- if len(current_chunk) + len(sentence) < max_length:
105
- current_chunk += sentence + ". "
106
- else:
107
- chunks.append(current_chunk.strip())
108
- current_chunk = sentence + ". "
109
-
110
- if current_chunk:
111
- chunks.append(current_chunk.strip())
112
-
113
- # βœ… Translate each chunk separately and combine results
114
- translated_chunks = []
115
- for chunk in chunks:
116
- inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True)
117
- translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
118
- translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
119
- translated_chunks.append(translated_text.strip())
120
-
121
- # βœ… Combine all translated chunks into final text
122
- full_translation = " ".join(translated_chunks)
123
-
124
- return {"translated_content": full_translation}
125
-
126
- # βœ… Create LangGraph Workflow
127
- def make_blog_generation_graph():
128
- """Create a LangGraph workflow for Blog Generation"""
129
- graph_workflow = StateGraph(State)
130
-
131
- # Define Nodes
132
- graph_workflow.add_node("title_generation", generate_titles)
133
- graph_workflow.add_node("content_generation", generate_content)
134
- graph_workflow.add_node("summary_generation", generate_summary)
135
- graph_workflow.add_node("translation", translate_content) # Ensures only blog content is translated
136
-
137
- # Define Execution Order
138
- graph_workflow.add_edge(START, "title_generation")
139
- graph_workflow.add_edge("title_generation", "content_generation")
140
- graph_workflow.add_edge("content_generation", "summary_generation") # Summary only generated from content
141
- graph_workflow.add_edge("content_generation", "translation") # Translation happens for content only
142
- graph_workflow.add_edge("summary_generation", END)
143
- graph_workflow.add_edge("translation", END)
144
-
145
- return graph_workflow.compile()
146
-
147
- # βœ… Gradio Interface with "Why Translate?" Section
148
- with gr.Blocks() as app:
149
- gr.Markdown(
150
- """
151
- ### 🌍 Why Translate?
152
- We provide translation to make the blog content **accessible to a global audience**.
153
- - πŸ—£οΈ **Multilingual Support** – Read blogs in your preferred language.
154
- - 🌎 **Expand Reach** – Reach international readers.
155
- - βœ… **Better Understanding** – Enjoy content in a language you're comfortable with.
156
- - πŸ€– **AI-Powered Accuracy** – Uses advanced AI models for precise translation.
157
- """
158
- )
159
-
160
- gr.Interface(
161
- fn=generate_blog,
162
- inputs=[
163
- gr.Textbox(label="Enter a topic for your blog"),
164
- gr.Dropdown(["Neutral", "Formal", "Casual", "Persuasive", "Humorous"], label="Select Blog Tone", value="Neutral"),
165
- gr.Dropdown(["English", "Hindi", "Telugu", "Spanish", "French"], label="Translate Blog To", value="English"),
166
- ],
167
- outputs=[
168
- gr.Textbox(label="Suggested Blog Titles (Choose One)"), # Displays multiple title suggestions
169
- gr.Textbox(label="Selected Blog Title"),
170
- gr.Textbox(label="Generated Blog Content"),
171
- gr.Textbox(label="Blog Summary"),
172
- gr.Textbox(label="Translated Blog Content"),
173
- ],
174
- title="πŸš€ AI-Powered Blog Generator with Multi-Title Suggestions",
175
- description="Generate high-quality blogs using Groq AI, customize tone, translate using NLLB-200, and get interactive summaries. Select from multiple title suggestions!",
176
- )
177
-
178
- # βœ… Launch the Gradio App
179
- app.launch(share=True)
 
1
+ import os
2
+ import gradio as gr
3
+ import traceback
4
+ import torch
5
+ from langgraph.graph import StateGraph, START, END
6
+ from langchain.schema import HumanMessage
7
+ from langchain_groq import ChatGroq
8
+ from langsmith import traceable
9
+ from typing import TypedDict
10
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
11
+
12
+ import os
13
+
14
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Get from Hugging Face secrets
15
+ LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
16
+
17
+
18
+ # βœ… Set environment variables
19
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
20
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
21
+ os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
22
+
23
+ # βœ… Initialize Groq LLM (for content generation)
24
+ llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="mixtral-8x7b-32768")
25
+
26
+ # βœ… Define State for LangGraph
27
+ class State(TypedDict):
28
+ topic: str
29
+ titles: list
30
+ selected_title: str
31
+ content: str
32
+ summary: str
33
+ translated_content: str
34
+ tone: str
35
+ language: str
36
+
37
+ # βœ… Function to generate multiple blog titles using Groq
38
+ @traceable(name="Generate Titles")
39
+ def generate_titles(data):
40
+ topic = data.get("topic", "")
41
+ prompt = f"Generate **three short and catchy blog titles** for the topic: {topic}. Each title should be under 10 words. Separate them with new lines."
42
+
43
+ response = llm([HumanMessage(content=prompt)])
44
+ titles = response.content.strip().split("\n") # Get three titles as a list
45
+
46
+ return {"titles": titles, "selected_title": titles[0]} # Default to first title
47
+
48
+ # βœ… Function to generate blog content with tone using Groq
49
+ @traceable(name="Generate Content")
50
+ def generate_content(data):
51
+ title = data.get("selected_title", "")
52
+ tone = data.get("tone", "Neutral")
53
+ prompt = f"Write a detailed and engaging blog post in a {tone} tone based on the title: {title}"
54
+
55
+ response = llm([HumanMessage(content=prompt)])
56
+ return {"content": response.content.strip()}
57
+
58
+ # βœ… Function to generate summary using Groq
59
+ @traceable(name="Generate Summary")
60
+ def generate_summary(data):
61
+ content = data.get("content", "")
62
+ prompt = f"Summarize this blog post in a short and engaging way: {content}"
63
+
64
+ response = llm([HumanMessage(content=prompt)])
65
+ return {"summary": response.content.strip()}
66
+
67
+ # βœ… Load translation model (NLLB-200)
68
+ def load_translation_model():
69
+ model_name = "facebook/nllb-200-distilled-600M" # Efficient model for 200+ languages
70
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
71
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
72
+ return tokenizer, model
73
+
74
+ tokenizer, model = load_translation_model()
75
+
76
+ # βœ… Language codes for NLLB-200
77
+ language_codes = {
78
+ "English": "eng_Latn",
79
+ "Hindi": "hin_Deva",
80
+ "Telugu": "tel_Telu",
81
+ "Spanish": "spa_Latn",
82
+ "French": "fra_Latn"
83
+ }
84
+
85
+ # βœ… Function to translate blog content using NLLB-200
86
+ @traceable(name="Translate Content")
87
+ def translate_content(data):
88
+ content = data.get("content", "")
89
+ language = data.get("language", "English")
90
+
91
+ if language == "English":
92
+ return {"translated_content": content} # No translation needed
93
+
94
+ tgt_lang = language_codes.get(language, "eng_Latn") # Default to English if not found
95
+
96
+ # βœ… Split content into smaller chunks (Avoids token limit issues)
97
+ max_length = 512 # Adjust based on model limitations
98
+ sentences = content.split(". ") # Split at sentence level
99
+ chunks = []
100
+ current_chunk = ""
101
+
102
+ for sentence in sentences:
103
+ if len(current_chunk) + len(sentence) < max_length:
104
+ current_chunk += sentence + ". "
105
+ else:
106
+ chunks.append(current_chunk.strip())
107
+ current_chunk = sentence + ". "
108
+
109
+ if current_chunk:
110
+ chunks.append(current_chunk.strip())
111
+
112
+ # βœ… Translate each chunk separately and combine results
113
+ translated_chunks = []
114
+ for chunk in chunks:
115
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True)
116
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
117
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
118
+ translated_chunks.append(translated_text.strip())
119
+
120
+ # βœ… Combine all translated chunks into final text
121
+ full_translation = " ".join(translated_chunks)
122
+
123
+ return {"translated_content": full_translation}
124
+
125
+ # βœ… Create LangGraph Workflow
126
+ def make_blog_generation_graph():
127
+ """Create a LangGraph workflow for Blog Generation"""
128
+ graph_workflow = StateGraph(State)
129
+
130
+ # Define Nodes
131
+ graph_workflow.add_node("title_generation", generate_titles)
132
+ graph_workflow.add_node("content_generation", generate_content)
133
+ graph_workflow.add_node("summary_generation", generate_summary)
134
+ graph_workflow.add_node("translation", translate_content) # Ensures only blog content is translated
135
+
136
+ # Define Execution Order
137
+ graph_workflow.add_edge(START, "title_generation")
138
+ graph_workflow.add_edge("title_generation", "content_generation")
139
+ graph_workflow.add_edge("content_generation", "summary_generation") # Summary only generated from content
140
+ graph_workflow.add_edge("content_generation", "translation") # Translation happens for content only
141
+ graph_workflow.add_edge("summary_generation", END)
142
+ graph_workflow.add_edge("translation", END)
143
+
144
+ return graph_workflow.compile()
145
+
146
+ # βœ… Gradio Interface with "Why Translate?" Section
147
+ with gr.Blocks() as app:
148
+ gr.Markdown(
149
+ """
150
+ ### 🌍 Why Translate?
151
+ We provide translation to make the blog content **accessible to a global audience**.
152
+ - πŸ—£οΈ **Multilingual Support** – Read blogs in your preferred language.
153
+ - 🌎 **Expand Reach** – Reach international readers.
154
+ - βœ… **Better Understanding** – Enjoy content in a language you're comfortable with.
155
+ - πŸ€– **AI-Powered Accuracy** – Uses advanced AI models for precise translation.
156
+ """
157
+ )
158
+
159
+ gr.Interface(
160
+ fn=generate_blog,
161
+ inputs=[
162
+ gr.Textbox(label="Enter a topic for your blog"),
163
+ gr.Dropdown(["Neutral", "Formal", "Casual", "Persuasive", "Humorous"], label="Select Blog Tone", value="Neutral"),
164
+ gr.Dropdown(["English", "Hindi", "Telugu", "Spanish", "French"], label="Translate Blog To", value="English"),
165
+ ],
166
+ outputs=[
167
+ gr.Textbox(label="Suggested Blog Titles (Choose One)"), # Displays multiple title suggestions
168
+ gr.Textbox(label="Selected Blog Title"),
169
+ gr.Textbox(label="Generated Blog Content"),
170
+ gr.Textbox(label="Blog Summary"),
171
+ gr.Textbox(label="Translated Blog Content"),
172
+ ],
173
+ title="πŸš€ AI-Powered Blog Generator with Multi-Title Suggestions",
174
+ description="Generate high-quality blogs using Groq AI, customize tone, translate using NLLB-200, and get interactive summaries. Select from multiple title suggestions!",
175
+ )
176
+
177
+ # βœ… Launch the Gradio App
178
+ app.launch(share=True)