Rsnarsna commited on
Commit
f39785f
·
verified ·
1 Parent(s): e7706e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -28
app.py CHANGED
@@ -16,60 +16,109 @@ app = FastAPI()
16
  class TextInput(BaseModel):
17
  text: str
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Function to split text into structured format
20
  def split_conversation(conversation, default_user="You"):
21
- conversation_lines = conversation.strip().split("\n") # Split lines
22
- split_lines = [] # List to store split conversation
23
-
24
  for line in conversation_lines:
25
- if ":" in line: # Split based on the first colon
26
  user, text = line.split(":", 1)
27
- text = text.strip().strip('"') # Remove extra whitespace and quotes
28
  split_lines.append({"user": user.strip(), "text": text})
29
-
30
  return split_lines
31
 
 
32
  # Function to analyze sentiment for each text entry
33
  def analyze_sentiment(conversation_list):
34
  overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
35
  total_entries = len(conversation_list)
36
-
37
  for entry in conversation_list:
38
- analysis = sentiment_pipeline(entry["text"], top_k=None) # Analyze text
39
- entry["analysis"] = analysis # Add analysis to the entry
40
-
41
- # Aggregate scores for overall analysis
42
  for sentiment in analysis:
43
  overall_scores[sentiment["label"]] += sentiment["score"]
44
 
45
- # Calculate overall averages
46
  overall_analysis = [
47
  {"label": label, "score": overall_scores[label] / total_entries}
48
  for label in overall_scores
49
  ]
50
-
51
  return overall_analysis
52
 
53
- @app.get("/")
54
- def read_root():
55
- return {"info": "This is a sentiment analysis API. Use the /analyse_text endpoint to analyze conversation text."}
56
 
57
  @app.post("/analyse_text")
58
  def analyse_text(input_data: TextInput):
59
- # Step 1: Split the conversation into structured format
60
  conversation_list = split_conversation(input_data.text)
61
-
62
- # Step 2: Analyze sentiment for each entry and generate overall analysis
63
  overall_analysis = analyze_sentiment(conversation_list)
64
 
65
- # Step 3: Combine results into the final output
66
- # result = {
67
- # "analyses": conversation_list,
68
- # "overall_analysis": overall_analysis
69
- # }
70
-
71
- result = {
72
- "overall_analysis": overall_analysis
73
  }
74
 
75
- return result
 
 
 
 
 
 
16
  class TextInput(BaseModel):
17
  text: str
18
 
19
+
20
+ # --- For /predict ---
21
+ # Function to split text into chunks
22
+ def split_text_into_chunks(text, max_tokens=500):
23
+ tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
24
+ input_ids = tokens['input_ids'][0].tolist()
25
+ chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
26
+ chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
27
+ return chunks, chunk_texts, [len(chunk) for chunk in chunks]
28
+
29
+
30
+ # Function to analyze sentiment for a list of chunks
31
+ def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
32
+ results = []
33
+ total_token_count = 0
34
+ for i, chunk in enumerate(chunk_texts):
35
+ total_token_count += chunk_token_counts[i]
36
+ analysis = sentiment_pipeline(chunk, top_k=None)
37
+ results.append({
38
+ "chunk": i + 1,
39
+ "text": chunk,
40
+ "token_count": chunk_token_counts[i],
41
+ "analysis": analysis,
42
+ })
43
+ return results, total_token_count
44
+
45
+
46
+ @app.post("/predict")
47
+ def predict_sentiment(input_data: TextInput):
48
+ chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
49
+ results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
50
+
51
+ total_neutral_score = total_positive_score = total_negative_score = 0
52
+ for result in results:
53
+ for sentiment in result['analysis']:
54
+ if sentiment['label'] == "Neutral":
55
+ total_neutral_score += sentiment['score']
56
+ elif sentiment['label'] == "Positive":
57
+ total_positive_score += sentiment['score']
58
+ elif sentiment['label'] == "Negative":
59
+ total_negative_score += sentiment['score']
60
+
61
+ num_chunks = len(results)
62
+ overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0
63
+ overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0
64
+ overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0
65
+
66
+ return {
67
+ "total_chunks": num_chunks,
68
+ "total_token_count": total_token_count,
69
+ "total_neutral_score": total_neutral_score,
70
+ "total_positive_score": total_positive_score,
71
+ "total_negative_score": total_negative_score,
72
+ "overall_neutral_score": overall_neutral_score,
73
+ "overall_positive_score": overall_positive_score,
74
+ "overall_negative_score": overall_negative_score,
75
+ "results": results,
76
+ }
77
+
78
+
79
+ # --- For /analyse_text ---
80
  # Function to split text into structured format
81
  def split_conversation(conversation, default_user="You"):
82
+ conversation_lines = conversation.strip().split("\n")
83
+ split_lines = []
 
84
  for line in conversation_lines:
85
+ if ":" in line:
86
  user, text = line.split(":", 1)
87
+ text = text.strip().strip('"')
88
  split_lines.append({"user": user.strip(), "text": text})
 
89
  return split_lines
90
 
91
+
92
  # Function to analyze sentiment for each text entry
93
  def analyze_sentiment(conversation_list):
94
  overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
95
  total_entries = len(conversation_list)
 
96
  for entry in conversation_list:
97
+ analysis = sentiment_pipeline(entry["text"], top_k=None)
98
+ entry["analysis"] = analysis
 
 
99
  for sentiment in analysis:
100
  overall_scores[sentiment["label"]] += sentiment["score"]
101
 
 
102
  overall_analysis = [
103
  {"label": label, "score": overall_scores[label] / total_entries}
104
  for label in overall_scores
105
  ]
 
106
  return overall_analysis
107
 
 
 
 
108
 
109
  @app.post("/analyse_text")
110
  def analyse_text(input_data: TextInput):
 
111
  conversation_list = split_conversation(input_data.text)
 
 
112
  overall_analysis = analyze_sentiment(conversation_list)
113
 
114
+ return {
115
+ "analyses": conversation_list,
116
+ "overall_analysis": overall_analysis,
 
 
 
 
 
117
  }
118
 
119
+
120
+ @app.get("/")
121
+ def read_root():
122
+ return {
123
+ "info": "This is a sentiment analysis API. Use /predict for chunk-wise analysis or /analyse_text for structured conversation analysis."
124
+ }