Spaces:
Sleeping
Sleeping
File size: 7,248 Bytes
032fa61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
import gradio as gr
import traceback
import torch
from langgraph.graph import StateGraph, START, END
from langchain.schema import HumanMessage
from langchain_groq import ChatGroq
from langsmith import traceable
from typing import TypedDict
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from google.colab import userdata # Only needed in Google Colab
import os
GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Get from Hugging Face secrets
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
# β
Set environment variables
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
# β
Initialize Groq LLM (for content generation)
llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="mixtral-8x7b-32768")
# β
Define State for LangGraph
class State(TypedDict):
topic: str
titles: list
selected_title: str
content: str
summary: str
translated_content: str
tone: str
language: str
# β
Function to generate multiple blog titles using Groq
@traceable(name="Generate Titles")
def generate_titles(data):
topic = data.get("topic", "")
prompt = f"Generate **three short and catchy blog titles** for the topic: {topic}. Each title should be under 10 words. Separate them with new lines."
response = llm([HumanMessage(content=prompt)])
titles = response.content.strip().split("\n") # Get three titles as a list
return {"titles": titles, "selected_title": titles[0]} # Default to first title
# β
Function to generate blog content with tone using Groq
@traceable(name="Generate Content")
def generate_content(data):
title = data.get("selected_title", "")
tone = data.get("tone", "Neutral")
prompt = f"Write a detailed and engaging blog post in a {tone} tone based on the title: {title}"
response = llm([HumanMessage(content=prompt)])
return {"content": response.content.strip()}
# β
Function to generate summary using Groq
@traceable(name="Generate Summary")
def generate_summary(data):
content = data.get("content", "")
prompt = f"Summarize this blog post in a short and engaging way: {content}"
response = llm([HumanMessage(content=prompt)])
return {"summary": response.content.strip()}
# β
Load translation model (NLLB-200)
def load_translation_model():
model_name = "facebook/nllb-200-distilled-600M" # Efficient model for 200+ languages
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return tokenizer, model
tokenizer, model = load_translation_model()
# β
Language codes for NLLB-200
language_codes = {
"English": "eng_Latn",
"Hindi": "hin_Deva",
"Telugu": "tel_Telu",
"Spanish": "spa_Latn",
"French": "fra_Latn"
}
# β
Function to translate blog content using NLLB-200
@traceable(name="Translate Content")
def translate_content(data):
content = data.get("content", "")
language = data.get("language", "English")
if language == "English":
return {"translated_content": content} # No translation needed
tgt_lang = language_codes.get(language, "eng_Latn") # Default to English if not found
# β
Split content into smaller chunks (Avoids token limit issues)
max_length = 512 # Adjust based on model limitations
sentences = content.split(". ") # Split at sentence level
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < max_length:
current_chunk += sentence + ". "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
# β
Translate each chunk separately and combine results
translated_chunks = []
for chunk in chunks:
inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True)
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
translated_chunks.append(translated_text.strip())
# β
Combine all translated chunks into final text
full_translation = " ".join(translated_chunks)
return {"translated_content": full_translation}
# β
Create LangGraph Workflow
def make_blog_generation_graph():
"""Create a LangGraph workflow for Blog Generation"""
graph_workflow = StateGraph(State)
# Define Nodes
graph_workflow.add_node("title_generation", generate_titles)
graph_workflow.add_node("content_generation", generate_content)
graph_workflow.add_node("summary_generation", generate_summary)
graph_workflow.add_node("translation", translate_content) # Ensures only blog content is translated
# Define Execution Order
graph_workflow.add_edge(START, "title_generation")
graph_workflow.add_edge("title_generation", "content_generation")
graph_workflow.add_edge("content_generation", "summary_generation") # Summary only generated from content
graph_workflow.add_edge("content_generation", "translation") # Translation happens for content only
graph_workflow.add_edge("summary_generation", END)
graph_workflow.add_edge("translation", END)
return graph_workflow.compile()
# β
Gradio Interface with "Why Translate?" Section
with gr.Blocks() as app:
gr.Markdown(
"""
### π Why Translate?
We provide translation to make the blog content **accessible to a global audience**.
- π£οΈ **Multilingual Support** β Read blogs in your preferred language.
- π **Expand Reach** β Reach international readers.
- β
**Better Understanding** β Enjoy content in a language you're comfortable with.
- π€ **AI-Powered Accuracy** β Uses advanced AI models for precise translation.
"""
)
gr.Interface(
fn=generate_blog,
inputs=[
gr.Textbox(label="Enter a topic for your blog"),
gr.Dropdown(["Neutral", "Formal", "Casual", "Persuasive", "Humorous"], label="Select Blog Tone", value="Neutral"),
gr.Dropdown(["English", "Hindi", "Telugu", "Spanish", "French"], label="Translate Blog To", value="English"),
],
outputs=[
gr.Textbox(label="Suggested Blog Titles (Choose One)"), # Displays multiple title suggestions
gr.Textbox(label="Selected Blog Title"),
gr.Textbox(label="Generated Blog Content"),
gr.Textbox(label="Blog Summary"),
gr.Textbox(label="Translated Blog Content"),
],
title="π AI-Powered Blog Generator with Multi-Title Suggestions",
description="Generate high-quality blogs using Groq AI, customize tone, translate using NLLB-200, and get interactive summaries. Select from multiple title suggestions!",
)
# β
Launch the Gradio App
app.launch(share=True)
|