Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

mohamedrady commited on Jul 18, 2024

Commit

0f146ca

verified ·

1 Parent(s): 7b7b995

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -24

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ openai.api_key = "sk-proj-62TDbO5KABSdkZaFPPD4T3BlbkFJkhqOYpHhL6OucTzNdWSU"
 nltk.download('punkt')
 # التحقق من توفر GPU واستخدامه
-device = 0 إذا torch.cuda.is_available() else -1
 # تحميل نماذج التحليل اللغوي
 analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
@@ -42,7 +42,7 @@ def camel_ner_analysis(text):
     entities = ner.predict(tokens)
     entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
     for token, tag in zip(tokens, entities):
-        إذا tag in entity_dict:
             entity_dict[tag].append((token, tag))
     return entity_dict
@@ -61,7 +61,7 @@ def nltk_extract_quotes(text):
     quotes = []
     sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
     for sentence in sentences:
-        إذا '"' in sentence أو '«' in sentence أو '»' in sentence:
             quotes.append(sentence)
     return quotes
@@ -72,10 +72,10 @@ def count_tokens(text):
 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
-    مع open(file_path, "rb") كما pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
         text = ""
-        لكل page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             text += page.extract_text()
     return text
@@ -83,7 +83,7 @@ def extract_pdf_text(file_path):
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
     scenes = re.split(r'داخلي|خارجي', text)
-    scenes = [scene.strip() for scene in scenes إذا scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
@@ -92,9 +92,9 @@ def extract_scene_details(scene):
     location_match = re.search(r'(داخلي|خارجي)', scene)
     time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
-    إذا location_match:
         details['location'] = location_match.group()
-    إذا time_match:
         details['time'] = time_match.group()
     return details
@@ -125,11 +125,11 @@ def analyze_and_complete(file_paths):
     results = []
     output_directory = os.getenv("SPACE_DIR", "/app/output")
-    لكل file_path in file_paths:
-        إذا file_path.endswith(".pdf"):
             text = extract_pdf_text(file_path)
         else:
-            مع open(file_path, "r", encoding="utf-8") كما file:
                 text = file.read()
         filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
@@ -145,40 +145,40 @@ def analyze_and_complete(file_paths):
         character_frequency = extract_character_frequency(camel_entities)
         dialogues = extract_dialogues(text)
-        scene_details = [extract_scene_details(scene) لكل scene in scenes]
         # حفظ النتائج إلى ملفات
-        مع open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") كما file:
             file.write(str(camel_entities))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") كما file:
             file.write(str(sentiments))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(sentences))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(quotes))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") كما file:
             file.write(str(token_count))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(scenes))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") كما file:
             file.write(str(scene_details))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") كما file:
             file.write(str(ages))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") كما file:
             file.write(str(character_descriptions))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") كما file:
             file.write(str(character_frequency))
-        مع open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") كما file:
             file.write(str(dialogues))
         results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
@@ -189,7 +189,7 @@ def analyze_and_complete(file_paths):
 interface = gr.Interface(
     fn=analyze_and_complete,
     inputs=gr.File(file_count="multiple", type="filepath"),
-    outputs=gr.outputs.JSON(),
     title="Movie Script Analyzer and Completer",
     description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
 )

 nltk.download('punkt')
 # التحقق من توفر GPU واستخدامه
+device = 0 if torch.cuda.is_available() else -1
 # تحميل نماذج التحليل اللغوي
 analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
     entities = ner.predict(tokens)
     entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
     for token, tag in zip(tokens, entities):
+        if tag in entity_dict:
             entity_dict[tag].append((token, tag))
     return entity_dict
     quotes = []
     sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
     for sentence in sentences:
+        if '"' in sentence or '«' in sentence or '»' in sentence:
             quotes.append(sentence)
     return quotes
 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
+    with open(file_path, "rb") as pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
         text = ""
+        for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             text += page.extract_text()
     return text
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
     scenes = re.split(r'داخلي|خارجي', text)
+    scenes = [scene.strip() for scene in scenes if scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
     location_match = re.search(r'(داخلي|خارجي)', scene)
     time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
+    if location_match:
         details['location'] = location_match.group()
+    if time_match:
         details['time'] = time_match.group()
     return details
     results = []
     output_directory = os.getenv("SPACE_DIR", "/app/output")
+    for file_path in file_paths:
+        if file_path.endswith(".pdf"):
             text = extract_pdf_text(file_path)
         else:
+            with open(file_path, "r", encoding="utf-8") as file:
                 text = file.read()
         filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
         character_frequency = extract_character_frequency(camel_entities)
         dialogues = extract_dialogues(text)
+        scene_details = [extract_scene_details(scene) for scene in scenes]
         # حفظ النتائج إلى ملفات
+        with open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") as file:
             file.write(str(camel_entities))
+        with open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") as file:
             file.write(str(sentiments))
+        with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(sentences))
+        with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(quotes))
+        with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
             file.write(str(token_count))
+        with open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(scenes))
+        with open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") as file:
             file.write(str(scene_details))
+        with open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") as file:
             file.write(str(ages))
+        with open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") as file:
             file.write(str(character_descriptions))
+        with open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") as file:
             file.write(str(character_frequency))
+        with open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") as file:
             file.write(str(dialogues))
         results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
 interface = gr.Interface(
     fn=analyze_and_complete,
     inputs=gr.File(file_count="multiple", type="filepath"),
+    outputs=gr.JSON(),
     title="Movie Script Analyzer and Completer",
     description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
 )