jaothan commited on
Commit
ab094b5
·
verified ·
1 Parent(s): f9e4b65

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile.txt +18 -0
  2. app.py +41 -0
  3. evaluate_prompts.py +42 -0
  4. requirements.txt +5 -0
Dockerfile.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python base image
2
+ FROM python:3.8
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy project files
12
+ COPY . .
13
+
14
+ # Run evaluation before deploying
15
+ RUN python evaluate_prompts.py
16
+
17
+ # If evaluation passes, launch Streamlit app
18
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import torch
4
+ from transformers import pipeline
5
+ from datasets import load_metric
6
+
7
+ # Load evaluation metric
8
+ rouge = load_metric("rouge")
9
+
10
+ # Load the summarization model
11
+ summarizer = pipeline("summarization", model="facebook/bart-base")
12
+
13
+ st.title("📝 Text Summarization with Hugging Face & Streamlit")
14
+
15
+ # User input
16
+ user_input = st.text_area("Enter your text here:", "")
17
+
18
+ if st.button("Summarize"):
19
+ if user_input:
20
+ # Generate summary
21
+ summary = summarizer(user_input, max_length=50, min_length=5, do_sample=False)[0]["summary_text"]
22
+ st.subheader("Generated Summary:")
23
+ st.write(summary)
24
+
25
+ # Evaluate with a dummy reference summary
26
+ reference_summary = "Example reference summary for evaluation"
27
+ score = rouge.compute(predictions=[summary], references=[reference_summary])
28
+
29
+ st.subheader("ROUGE Scores:")
30
+ st.json(score)
31
+ else:
32
+ st.warning("⚠️ Please enter text to summarize!")
33
+
34
+ # Display latest evaluation results
35
+ st.subheader("Latest Evaluation Results:")
36
+ try:
37
+ with open("evaluation_results.json", "r") as f:
38
+ results = json.load(f)
39
+ st.json(results)
40
+ except FileNotFoundError:
41
+ st.write("No evaluation results found.")
evaluate_prompts.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ from transformers import pipeline
4
+ from datasets import load_metric
5
+
6
+ # Load evaluation metric
7
+ rouge = load_metric("rouge")
8
+
9
+ # Load summarization model
10
+ summarizer = pipeline("summarization", model="facebook/bart-base")
11
+
12
+ # Example prompts & expected outputs
13
+ test_cases = [
14
+ {"input": "The Eiffel Tower is a landmark in Paris, built in 1889.", "expected_summary": "The Eiffel Tower was built in 1889 in Paris."},
15
+ {"input": "AI is changing industries by automating tasks and providing insights.", "expected_summary": "AI is transforming industries with automation."}
16
+ ]
17
+
18
+ def evaluate():
19
+ results = []
20
+ for case in test_cases:
21
+ model_output = summarizer(case["input"], max_length=50, min_length=5, do_sample=False)[0]["summary_text"]
22
+ score = rouge.compute(predictions=[model_output], references=[case["expected_summary"]])
23
+
24
+ results.append({"input": case["input"], "generated_summary": model_output, "rouge_score": score})
25
+
26
+ # Save evaluation results
27
+ with open("evaluation_results.json", "w") as f:
28
+ json.dump(results, f, indent=4)
29
+
30
+ avg_rouge_l = sum(res["rouge_score"]["rougeL"].mid.fmeasure for res in results) / len(results)
31
+
32
+ if avg_rouge_l >= 0.4:
33
+ print("✅ Model passed evaluation.")
34
+ return True
35
+ else:
36
+ print("❌ Model failed evaluation. Improve prompts or model.")
37
+ return False
38
+
39
+ if __name__ == "__main__":
40
+ success = evaluate()
41
+ if not success:
42
+ exit(1) # Prevent deployment if evaluation fails
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ datasets
5
+ json