Spaces:
Sleeping
Sleeping
T-K-O-H commited on
Commit ·
50bca05
1
Parent(s): ef853e9
huggingface issue 43
Browse files- app.py +106 -30
- requirements.txt +13 -23
app.py
CHANGED
|
@@ -25,6 +25,8 @@ from datasets import Dataset
|
|
| 25 |
# import plotly.graph_objects as go
|
| 26 |
import numpy as np
|
| 27 |
from langchain_community.vectorstores import FAISS
|
|
|
|
|
|
|
| 28 |
from langchain_chroma import Chroma
|
| 29 |
from langchain.schema import Document
|
| 30 |
from datetime import datetime
|
|
@@ -32,6 +34,10 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
| 32 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 33 |
# from ragas import evaluate
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# Load environment variables
|
| 36 |
load_dotenv(verbose=True)
|
| 37 |
|
|
@@ -1084,8 +1090,6 @@ def create_ui():
|
|
| 1084 |
def process_with_loading(url, state):
|
| 1085 |
"""Process video with loading indicators."""
|
| 1086 |
try:
|
| 1087 |
-
print(f"Starting processing for URL: {url}") # Debug log
|
| 1088 |
-
|
| 1089 |
# Initialize state if needed
|
| 1090 |
if "improvement_plan" not in state:
|
| 1091 |
state["improvement_plan"] = {}
|
|
@@ -1098,10 +1102,9 @@ def create_ui():
|
|
| 1098 |
|
| 1099 |
# Show loading indicators
|
| 1100 |
loading_states, message = update_loading_state("transcript")
|
| 1101 |
-
print("Fetching transcript...") # Debug log
|
| 1102 |
yield [
|
| 1103 |
"", # error
|
| 1104 |
-
"
|
| 1105 |
message, # transcript (loading)
|
| 1106 |
"", # enhanced
|
| 1107 |
"", # linkedin
|
|
@@ -1115,13 +1118,7 @@ def create_ui():
|
|
| 1115 |
|
| 1116 |
# Get transcript
|
| 1117 |
state["video_url"] = url
|
| 1118 |
-
|
| 1119 |
-
if "error" in transcript_state and transcript_state["error"]:
|
| 1120 |
-
print(f"Error in transcript: {transcript_state['error']}") # Debug log
|
| 1121 |
-
raise Exception(transcript_state["error"])
|
| 1122 |
-
|
| 1123 |
-
transcript_text = transcript_state["transcript"]
|
| 1124 |
-
print(f"Transcript length: {len(transcript_text)}") # Debug log
|
| 1125 |
|
| 1126 |
# Show enhancing state
|
| 1127 |
loading_states, message = update_loading_state("enhance")
|
|
@@ -1142,12 +1139,7 @@ def create_ui():
|
|
| 1142 |
# Enhance content
|
| 1143 |
state["transcript"] = transcript_text
|
| 1144 |
enhanced_state = enhance_content(state)
|
| 1145 |
-
if "error" in enhanced_state and enhanced_state["error"]:
|
| 1146 |
-
print(f"Error in enhancement: {enhanced_state['error']}") # Debug log
|
| 1147 |
-
raise Exception(enhanced_state["error"])
|
| 1148 |
-
|
| 1149 |
enhanced_text = enhanced_state["enhanced"]
|
| 1150 |
-
print("Content enhanced successfully") # Debug log
|
| 1151 |
|
| 1152 |
# Show formatting state
|
| 1153 |
loading_states, message = update_loading_state("format")
|
|
@@ -1168,12 +1160,7 @@ def create_ui():
|
|
| 1168 |
# Format LinkedIn post
|
| 1169 |
state["enhanced"] = enhanced_text
|
| 1170 |
linkedin_state = format_linkedin_post(state)
|
| 1171 |
-
if "error" in linkedin_state and linkedin_state["error"]:
|
| 1172 |
-
print(f"Error in formatting: {linkedin_state['error']}") # Debug log
|
| 1173 |
-
raise Exception(linkedin_state["error"])
|
| 1174 |
-
|
| 1175 |
linkedin_text = linkedin_state["linkedin_post"]
|
| 1176 |
-
print("LinkedIn post formatted successfully") # Debug log
|
| 1177 |
|
| 1178 |
# Show verifying state
|
| 1179 |
loading_states, message = update_loading_state("verify")
|
|
@@ -1183,7 +1170,7 @@ def create_ui():
|
|
| 1183 |
transcript_text,
|
| 1184 |
enhanced_text,
|
| 1185 |
linkedin_text,
|
| 1186 |
-
"🔍 Verifying...", # verification (loading)
|
| 1187 |
"",
|
| 1188 |
"",
|
| 1189 |
"",
|
|
@@ -1195,15 +1182,109 @@ def create_ui():
|
|
| 1195 |
state["linkedin_post"] = linkedin_text
|
| 1196 |
final_state = verify_content(state)
|
| 1197 |
verification_text = format_verification_text(final_state.get("verification", {}))
|
| 1198 |
-
print("Content verified successfully") # Debug log
|
| 1199 |
|
| 1200 |
# Update improvement plan and research results
|
| 1201 |
improvement_plan_text = format_improvement_plan(final_state.get("improvement_plan", {}))
|
| 1202 |
research_results_text = format_research_results(safe_json_loads(final_state.get("research_context", "{}")))
|
| 1203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1204 |
# Complete
|
| 1205 |
loading_states, _ = update_loading_state("done")
|
| 1206 |
-
|
| 1207 |
"",
|
| 1208 |
"✅ Processing complete!",
|
| 1209 |
transcript_text,
|
|
@@ -1216,13 +1297,10 @@ def create_ui():
|
|
| 1216 |
final_state,
|
| 1217 |
*loading_states
|
| 1218 |
]
|
| 1219 |
-
print("Processing completed successfully") # Debug log
|
| 1220 |
-
yield final_result
|
| 1221 |
|
| 1222 |
except Exception as e:
|
| 1223 |
-
print(f"Error in processing: {str(e)}") # Debug log
|
| 1224 |
loading_states, _ = update_loading_state("done")
|
| 1225 |
-
|
| 1226 |
f"⚠️ Error: {str(e)}",
|
| 1227 |
"❌ Processing failed",
|
| 1228 |
state.get("transcript", ""),
|
|
@@ -1235,7 +1313,6 @@ def create_ui():
|
|
| 1235 |
state,
|
| 1236 |
*loading_states
|
| 1237 |
]
|
| 1238 |
-
yield error_result
|
| 1239 |
|
| 1240 |
# Set up event handlers
|
| 1241 |
youtube_convert_btn.click(
|
|
@@ -1610,7 +1687,6 @@ Important:
|
|
| 1610 |
if __name__ == "__main__":
|
| 1611 |
print_graph() # Print the graph visualization
|
| 1612 |
demo = create_ui()
|
| 1613 |
-
demo.queue() # Enable queuing for better handling of concurrent requests
|
| 1614 |
demo.launch(
|
| 1615 |
server_name="0.0.0.0",
|
| 1616 |
server_port=None, # Let Gradio find an available port
|
|
|
|
| 25 |
# import plotly.graph_objects as go
|
| 26 |
import numpy as np
|
| 27 |
from langchain_community.vectorstores import FAISS
|
| 28 |
+
import asyncio
|
| 29 |
+
import nest_asyncio
|
| 30 |
from langchain_chroma import Chroma
|
| 31 |
from langchain.schema import Document
|
| 32 |
from datetime import datetime
|
|
|
|
| 34 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 35 |
# from ragas import evaluate
|
| 36 |
|
| 37 |
+
# # Configure asyncio
|
| 38 |
+
# nest_asyncio.apply()
|
| 39 |
+
# asyncio.set_event_loop(asyncio.new_event_loop())
|
| 40 |
+
|
| 41 |
# Load environment variables
|
| 42 |
load_dotenv(verbose=True)
|
| 43 |
|
|
|
|
| 1090 |
def process_with_loading(url, state):
|
| 1091 |
"""Process video with loading indicators."""
|
| 1092 |
try:
|
|
|
|
|
|
|
| 1093 |
# Initialize state if needed
|
| 1094 |
if "improvement_plan" not in state:
|
| 1095 |
state["improvement_plan"] = {}
|
|
|
|
| 1102 |
|
| 1103 |
# Show loading indicators
|
| 1104 |
loading_states, message = update_loading_state("transcript")
|
|
|
|
| 1105 |
yield [
|
| 1106 |
"", # error
|
| 1107 |
+
"Processing...", # status
|
| 1108 |
message, # transcript (loading)
|
| 1109 |
"", # enhanced
|
| 1110 |
"", # linkedin
|
|
|
|
| 1118 |
|
| 1119 |
# Get transcript
|
| 1120 |
state["video_url"] = url
|
| 1121 |
+
transcript_text = get_transcript(state)["transcript"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1122 |
|
| 1123 |
# Show enhancing state
|
| 1124 |
loading_states, message = update_loading_state("enhance")
|
|
|
|
| 1139 |
# Enhance content
|
| 1140 |
state["transcript"] = transcript_text
|
| 1141 |
enhanced_state = enhance_content(state)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
enhanced_text = enhanced_state["enhanced"]
|
|
|
|
| 1143 |
|
| 1144 |
# Show formatting state
|
| 1145 |
loading_states, message = update_loading_state("format")
|
|
|
|
| 1160 |
# Format LinkedIn post
|
| 1161 |
state["enhanced"] = enhanced_text
|
| 1162 |
linkedin_state = format_linkedin_post(state)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1163 |
linkedin_text = linkedin_state["linkedin_post"]
|
|
|
|
| 1164 |
|
| 1165 |
# Show verifying state
|
| 1166 |
loading_states, message = update_loading_state("verify")
|
|
|
|
| 1170 |
transcript_text,
|
| 1171 |
enhanced_text,
|
| 1172 |
linkedin_text,
|
| 1173 |
+
"🔍 Verifying...\n⚖️ Analyzing accuracy...", # verification (loading)
|
| 1174 |
"",
|
| 1175 |
"",
|
| 1176 |
"",
|
|
|
|
| 1182 |
state["linkedin_post"] = linkedin_text
|
| 1183 |
final_state = verify_content(state)
|
| 1184 |
verification_text = format_verification_text(final_state.get("verification", {}))
|
|
|
|
| 1185 |
|
| 1186 |
# Update improvement plan and research results
|
| 1187 |
improvement_plan_text = format_improvement_plan(final_state.get("improvement_plan", {}))
|
| 1188 |
research_results_text = format_research_results(safe_json_loads(final_state.get("research_context", "{}")))
|
| 1189 |
|
| 1190 |
+
# Check if enhancement is needed
|
| 1191 |
+
if final_state.get("needs_improvement", False):
|
| 1192 |
+
# Show planning state
|
| 1193 |
+
loading_states, message = update_loading_state("plan")
|
| 1194 |
+
yield [
|
| 1195 |
+
"",
|
| 1196 |
+
f"Creating improvement plan (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 1197 |
+
transcript_text,
|
| 1198 |
+
enhanced_text,
|
| 1199 |
+
linkedin_text,
|
| 1200 |
+
verification_text,
|
| 1201 |
+
improvement_plan_text,
|
| 1202 |
+
research_results_text,
|
| 1203 |
+
"",
|
| 1204 |
+
state,
|
| 1205 |
+
*loading_states
|
| 1206 |
+
]
|
| 1207 |
+
|
| 1208 |
+
# Show researching state
|
| 1209 |
+
loading_states, message = update_loading_state("research")
|
| 1210 |
+
yield [
|
| 1211 |
+
"",
|
| 1212 |
+
f"Researching content (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 1213 |
+
transcript_text,
|
| 1214 |
+
enhanced_text,
|
| 1215 |
+
linkedin_text,
|
| 1216 |
+
verification_text,
|
| 1217 |
+
improvement_plan_text,
|
| 1218 |
+
research_results_text,
|
| 1219 |
+
"",
|
| 1220 |
+
state,
|
| 1221 |
+
*loading_states
|
| 1222 |
+
]
|
| 1223 |
+
|
| 1224 |
+
# Research content
|
| 1225 |
+
state = research_content(state)
|
| 1226 |
+
research_results_text = format_research_results(safe_json_loads(state.get("research_context", "{}")))
|
| 1227 |
+
|
| 1228 |
+
# Show enhancing again state
|
| 1229 |
+
loading_states, message = update_loading_state("enhance")
|
| 1230 |
+
yield [
|
| 1231 |
+
"",
|
| 1232 |
+
f"Enhancing content again (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 1233 |
+
transcript_text,
|
| 1234 |
+
enhanced_text,
|
| 1235 |
+
linkedin_text,
|
| 1236 |
+
verification_text,
|
| 1237 |
+
improvement_plan_text,
|
| 1238 |
+
research_results_text,
|
| 1239 |
+
"",
|
| 1240 |
+
state,
|
| 1241 |
+
*loading_states
|
| 1242 |
+
]
|
| 1243 |
+
|
| 1244 |
+
# Enhance again
|
| 1245 |
+
state = enhance_again(state)
|
| 1246 |
+
enhanced_text = state["enhanced"]
|
| 1247 |
+
|
| 1248 |
+
# Update LinkedIn post
|
| 1249 |
+
state["enhanced"] = enhanced_text
|
| 1250 |
+
linkedin_state = format_linkedin_post(state)
|
| 1251 |
+
linkedin_text = linkedin_state["linkedin_post"]
|
| 1252 |
+
|
| 1253 |
+
# Verify again
|
| 1254 |
+
state["linkedin_post"] = linkedin_text
|
| 1255 |
+
final_state = verify_content(state)
|
| 1256 |
+
verification_text = format_verification_text(final_state.get("verification", {}))
|
| 1257 |
+
improvement_plan_text = format_improvement_plan(final_state.get("improvement_plan", {}))
|
| 1258 |
+
research_results_text = format_research_results(safe_json_loads(final_state.get("research_context", "{}")))
|
| 1259 |
+
|
| 1260 |
+
# After research and enhancement, create improved LinkedIn post
|
| 1261 |
+
if final_state.get("needs_improvement", False):
|
| 1262 |
+
# Show improved post loading state
|
| 1263 |
+
loading_states, message = update_loading_state("improved")
|
| 1264 |
+
yield [
|
| 1265 |
+
"",
|
| 1266 |
+
f"Creating improved LinkedIn post (Attempt {final_state.get('enhancement_attempts', 1)}/3)...",
|
| 1267 |
+
transcript_text,
|
| 1268 |
+
enhanced_text,
|
| 1269 |
+
linkedin_text,
|
| 1270 |
+
verification_text,
|
| 1271 |
+
improvement_plan_text,
|
| 1272 |
+
research_results_text,
|
| 1273 |
+
message, # improved linkedin (loading)
|
| 1274 |
+
state,
|
| 1275 |
+
*loading_states
|
| 1276 |
+
]
|
| 1277 |
+
|
| 1278 |
+
# Create improved LinkedIn post
|
| 1279 |
+
improved_state = format_linkedin_post(final_state)
|
| 1280 |
+
improved_text = improved_state["linkedin_post"]
|
| 1281 |
+
|
| 1282 |
+
# Update final state
|
| 1283 |
+
final_state["improved_linkedin"] = improved_text
|
| 1284 |
+
|
| 1285 |
# Complete
|
| 1286 |
loading_states, _ = update_loading_state("done")
|
| 1287 |
+
yield [
|
| 1288 |
"",
|
| 1289 |
"✅ Processing complete!",
|
| 1290 |
transcript_text,
|
|
|
|
| 1297 |
final_state,
|
| 1298 |
*loading_states
|
| 1299 |
]
|
|
|
|
|
|
|
| 1300 |
|
| 1301 |
except Exception as e:
|
|
|
|
| 1302 |
loading_states, _ = update_loading_state("done")
|
| 1303 |
+
yield [
|
| 1304 |
f"⚠️ Error: {str(e)}",
|
| 1305 |
"❌ Processing failed",
|
| 1306 |
state.get("transcript", ""),
|
|
|
|
| 1313 |
state,
|
| 1314 |
*loading_states
|
| 1315 |
]
|
|
|
|
| 1316 |
|
| 1317 |
# Set up event handlers
|
| 1318 |
youtube_convert_btn.click(
|
|
|
|
| 1687 |
if __name__ == "__main__":
|
| 1688 |
print_graph() # Print the graph visualization
|
| 1689 |
demo = create_ui()
|
|
|
|
| 1690 |
demo.launch(
|
| 1691 |
server_name="0.0.0.0",
|
| 1692 |
server_port=None, # Let Gradio find an available port
|
requirements.txt
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# Core dependencies
|
| 2 |
gradio>=4.0.0
|
| 3 |
python-dotenv>=1.0.0
|
| 4 |
sentence-transformers>=2.2.2
|
|
@@ -7,35 +6,26 @@ langchain>=0.1.0
|
|
| 7 |
langchain-openai>=0.0.5
|
| 8 |
langchain-core>=0.1.0
|
| 9 |
langgraph>=0.0.11
|
| 10 |
-
|
| 11 |
-
#
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
# beautifulsoup4>=4.12.0
|
| 15 |
-
# trafilatura>=1.6.1
|
| 16 |
-
|
| 17 |
-
# Vector store and embeddings
|
| 18 |
chromadb>=0.4.22
|
| 19 |
tiktoken>=0.6.0
|
| 20 |
-
protobuf==3.20.3
|
| 21 |
-
|
| 22 |
-
#
|
| 23 |
-
#
|
| 24 |
-
# datasets>=2.15.0
|
| 25 |
-
# plotly>=5.18.0
|
| 26 |
-
|
| 27 |
-
# Core ML dependencies
|
| 28 |
numpy>=1.24.0
|
| 29 |
faiss-cpu>=1.7.4
|
| 30 |
langchain-community>=0.0.10
|
| 31 |
langchain-chroma>=0.0.1
|
| 32 |
openai>=1.6.0
|
| 33 |
typing-extensions>=4.5.0
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
# ML model dependencies
|
| 37 |
torch>=2.0.0
|
| 38 |
transformers>=4.36.0
|
| 39 |
-
tqdm>=4.66.0
|
| 40 |
-
#
|
| 41 |
-
#
|
|
|
|
|
|
|
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
python-dotenv>=1.0.0
|
| 3 |
sentence-transformers>=2.2.2
|
|
|
|
| 6 |
langchain-openai>=0.0.5
|
| 7 |
langchain-core>=0.1.0
|
| 8 |
langgraph>=0.0.11
|
| 9 |
+
#PyPDF2>=3.0.0
|
| 10 |
+
#requests>=2.31.0
|
| 11 |
+
#beautifulsoup4>=4.12.0
|
| 12 |
+
#trafilatura>=1.6.1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
chromadb>=0.4.22
|
| 14 |
tiktoken>=0.6.0
|
| 15 |
+
#protobuf==3.20.3
|
| 16 |
+
#ragas>=0.1.0
|
| 17 |
+
#datasets>=2.15.0
|
| 18 |
+
#plotly>=5.18.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
numpy>=1.24.0
|
| 20 |
faiss-cpu>=1.7.4
|
| 21 |
langchain-community>=0.0.10
|
| 22 |
langchain-chroma>=0.0.1
|
| 23 |
openai>=1.6.0
|
| 24 |
typing-extensions>=4.5.0
|
| 25 |
+
asyncio>=3.4.3
|
|
|
|
|
|
|
| 26 |
torch>=2.0.0
|
| 27 |
transformers>=4.36.0
|
| 28 |
+
#tqdm>=4.66.0
|
| 29 |
+
#scikit-learn>=1.3.0
|
| 30 |
+
#pandas>=2.1.0
|
| 31 |
+
nest_asyncio>=1.5.8
|