Spaces:

Rsnarsna
/

sentiment_analyses

Sleeping

App Files Files Community

Rsnarsna commited on Dec 16, 2024

Commit

f39785f

verified ·

1 Parent(s): e7706e9

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -28

app.py CHANGED Viewed

@@ -16,60 +16,109 @@ app = FastAPI()
 class TextInput(BaseModel):
     text: str
 # Function to split text into structured format
 def split_conversation(conversation, default_user="You"):
-    conversation_lines = conversation.strip().split("\n")  # Split lines
-    split_lines = []  # List to store split conversation
     for line in conversation_lines:
-        if ":" in line:  # Split based on the first colon
             user, text = line.split(":", 1)
-            text = text.strip().strip('"')  # Remove extra whitespace and quotes
             split_lines.append({"user": user.strip(), "text": text})
     return split_lines
 # Function to analyze sentiment for each text entry
 def analyze_sentiment(conversation_list):
     overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
     total_entries = len(conversation_list)
     for entry in conversation_list:
-        analysis = sentiment_pipeline(entry["text"], top_k=None)  # Analyze text
-        entry["analysis"] = analysis  # Add analysis to the entry
-        # Aggregate scores for overall analysis
         for sentiment in analysis:
             overall_scores[sentiment["label"]] += sentiment["score"]
-    # Calculate overall averages
     overall_analysis = [
         {"label": label, "score": overall_scores[label] / total_entries}
         for label in overall_scores
     ]
     return overall_analysis
-@app.get("/")
-def read_root():
-    return {"info": "This is a sentiment analysis API. Use the /analyse_text endpoint to analyze conversation text."}
 @app.post("/analyse_text")
 def analyse_text(input_data: TextInput):
-    # Step 1: Split the conversation into structured format
     conversation_list = split_conversation(input_data.text)
-    # Step 2: Analyze sentiment for each entry and generate overall analysis
     overall_analysis = analyze_sentiment(conversation_list)
-    # Step 3: Combine results into the final output
-    # result = {
-    #     "analyses": conversation_list,
-    #     "overall_analysis": overall_analysis
-    # }
-    result = {
-        "overall_analysis": overall_analysis
     }
-    return result

 class TextInput(BaseModel):
     text: str
+# --- For /predict ---
+# Function to split text into chunks
+def split_text_into_chunks(text, max_tokens=500):
+    tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
+    input_ids = tokens['input_ids'][0].tolist()
+    chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
+    chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
+    return chunks, chunk_texts, [len(chunk) for chunk in chunks]
+# Function to analyze sentiment for a list of chunks
+def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
+    results = []
+    total_token_count = 0
+    for i, chunk in enumerate(chunk_texts):
+        total_token_count += chunk_token_counts[i]
+        analysis = sentiment_pipeline(chunk, top_k=None)
+        results.append({
+            "chunk": i + 1,
+            "text": chunk,
+            "token_count": chunk_token_counts[i],
+            "analysis": analysis,
+        })
+    return results, total_token_count
+@app.post("/predict")
+def predict_sentiment(input_data: TextInput):
+    chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
+    results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
+    total_neutral_score = total_positive_score = total_negative_score = 0
+    for result in results:
+        for sentiment in result['analysis']:
+            if sentiment['label'] == "Neutral":
+                total_neutral_score += sentiment['score']
+            elif sentiment['label'] == "Positive":
+                total_positive_score += sentiment['score']
+            elif sentiment['label'] == "Negative":
+                total_negative_score += sentiment['score']
+    num_chunks = len(results)
+    overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0
+    overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0
+    overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0
+    return {
+        "total_chunks": num_chunks,
+        "total_token_count": total_token_count,
+        "total_neutral_score": total_neutral_score,
+        "total_positive_score": total_positive_score,
+        "total_negative_score": total_negative_score,
+        "overall_neutral_score": overall_neutral_score,
+        "overall_positive_score": overall_positive_score,
+        "overall_negative_score": overall_negative_score,
+        "results": results,
+    }
+# --- For /analyse_text ---
 # Function to split text into structured format
 def split_conversation(conversation, default_user="You"):
+    conversation_lines = conversation.strip().split("\n")
+    split_lines = []
     for line in conversation_lines:
+        if ":" in line:
             user, text = line.split(":", 1)
+            text = text.strip().strip('"')
             split_lines.append({"user": user.strip(), "text": text})
     return split_lines
 # Function to analyze sentiment for each text entry
 def analyze_sentiment(conversation_list):
     overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
     total_entries = len(conversation_list)
     for entry in conversation_list:
+        analysis = sentiment_pipeline(entry["text"], top_k=None)
+        entry["analysis"] = analysis
         for sentiment in analysis:
             overall_scores[sentiment["label"]] += sentiment["score"]
     overall_analysis = [
         {"label": label, "score": overall_scores[label] / total_entries}
         for label in overall_scores
     ]
     return overall_analysis
 @app.post("/analyse_text")
 def analyse_text(input_data: TextInput):
     conversation_list = split_conversation(input_data.text)
     overall_analysis = analyze_sentiment(conversation_list)
+    return {
+        "analyses": conversation_list,
+        "overall_analysis": overall_analysis,
     }
+@app.get("/")
+def read_root():
+    return {
+        "info": "This is a sentiment analysis API. Use /predict for chunk-wise analysis or /analyse_text for structured conversation analysis."
+    }