Added deepfake

Files changed (7) hide show

.env.example +1 -0
.gitignore +2 -1
app.py +50 -110
deepfake2/deepfake2.py +207 -0
deepfake2/deepfake_detector.h5 +3 -0
deepfake2/testing2.py +118 -0
final.py +5 -52

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ GEMINI_API=

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 __pycache__/
-node_modules/

 __pycache__/
+node_modules/
+.env

app.py CHANGED Viewed

@@ -1,113 +1,53 @@
-# # from fastapi import FastAPI
-# # from pydantic import BaseModel
-# # from final import predict_news, get_gemini_analysis
-# # app = FastAPI()
-# # class NewsInput(BaseModel):
-# #     text: str
-# # @app.post("/analyze")
-# # async def analyze_news(news: NewsInput):
-# #     # Get ML and Knowledge Graph prediction
-# #     prediction = predict_news(news.text)
-# #     # Get Gemini analysis
-# #     gemini_analysis = get_gemini_analysis(news.text)
-# #     return {
-# #         "prediction": prediction,
-# #         "detailed_analysis": gemini_analysis
-# #     }
-# # @app.get("/health")
-# # async def health_check():
-# #     return {"status": "healthy"}
-# from fastapi import FastAPI
-# from fastapi.middleware.cors import CORSMiddleware
-# from pydantic import BaseModel
-# from final import predict_news, get_gemini_analysis
-# app = FastAPI()
-# # Add CORS middleware
-# app.add_middleware(
-#     CORSMiddleware,
-#     allow_origins=["http://localhost:5173"],  # Your React app's URL
-#     allow_credentials=True,
-#     allow_methods=["*"],
-#     allow_headers=["*"],
-# )
-# # Rest of your code remains the same
-# class NewsInput(BaseModel):
-#     text: str
-# @app.post("/analyze")
-# async def analyze_news(news: NewsInput):
-#     prediction = predict_news(news.text)
-#     gemini_analysis = get_gemini_analysis(news.text)
-#     return {
-#         "prediction": prediction,
-#         "detailed_analysis": gemini_analysis
-#     }
-import streamlit as st
-from final import predict_news, get_gemini_analysis
-def main():
-    st.title("News Fact Checker")
-    st.write("Enter news text to analyze its authenticity")
-    # Text input area
-    news_text = st.text_area("Enter news text here:", height=200)
-    if st.button("Analyze"):
-        if news_text:
-            with st.spinner("Analyzing..."):
-                # Get predictions and analysis
-                prediction = predict_news(news_text)
-                gemini_analysis = get_gemini_analysis(news_text)
-                # Display results
-                st.header("Analysis Results")
-                # Main prediction with color coding
-                prediction_color = "green" if prediction == "REAL" else "red"
-                st.markdown(f"### Prediction: <span style='color:{prediction_color}'>{prediction}</span>", unsafe_allow_html=True)
-                # Detailed Gemini Analysis
-                st.subheader("Detailed Analysis")
-                # Display structured analysis
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.markdown("#### Content Classification")
-                    st.write(f"Category: {gemini_analysis['text_classification']['category']}")
-                    st.write(f"Writing Style: {gemini_analysis['text_classification']['writing_style']}")
-                    st.write(f"Content Type: {gemini_analysis['text_classification']['content_type']}")
-                with col2:
-                    st.markdown("#### Sentiment Analysis")
-                    st.write(f"Primary Emotion: {gemini_analysis['sentiment_analysis']['primary_emotion']}")
-                    st.write(f"Emotional Intensity: {gemini_analysis['sentiment_analysis']['emotional_intensity']}/10")
-                    st.write(f"Sensationalism Level: {gemini_analysis['sentiment_analysis']['sensationalism_level']}")
-                # Fact checking section
-                st.markdown("#### Fact Checking")
-                st.write(f"Evidence Present: {gemini_analysis['fact_checking']['evidence_present']}")
-                st.write(f"Fact Check Score: {gemini_analysis['fact_checking']['fact_check_score']}/100")
-                # Verifiable claims
-                st.markdown("#### Verifiable Claims")
-                for claim in gemini_analysis['fact_checking']['verifiable_claims']:
-                    st.write(f"- {claim}")
-        else:
-            st.warning("Please enter some text to analyze")
-if __name__ == "__main__":
-    main()

+from fastapi import FastAPI, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from final import predict_news, get_gemini_analysis
+import os
+from tempfile import NamedTemporaryFile
+app = FastAPI()
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:5173"],  # Your React app's URL
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Rest of your code remains the same
+class NewsInput(BaseModel):
+    text: str
+@app.post("/analyze")
+async def analyze_news(news: NewsInput):
+    prediction = predict_news(news.text)
+    gemini_analysis = get_gemini_analysis(news.text)
+    return {
+        "prediction": prediction,
+        "detailed_analysis": gemini_analysis
+    }
+@app.post("/detect-deepfake")
+async def detect_deepfake(image: UploadFile = File(...)):
+    try:
+        # Save uploaded image temporarily
+        with NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
+            contents = await image.read()
+            temp_file.write(contents)
+            temp_file_path = temp_file.name
+        # Use your existing deepfake detection function
+        from deepfake2.testing2 import predict_image  # Use your existing function
+        result = predict_image(temp_file_path)
+        # Clean up temp file
+        os.remove(temp_file_path)
+        return result
+    except Exception as e:
+        return {"error": str(e)}, 500

deepfake2/deepfake2.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import os
+import shutil
+import cv2
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing import image
+import random
+# Paths to dataset directories
+data_dir = "C:/Users/ramya/OneDrive - iiit-b/Desktop/data_deepfake/Dataset/"
+train_dir = os.path.join(data_dir, "Train")
+val_dir = os.path.join(data_dir, "Validation")
+temp_train_dir = os.path.join(data_dir, "Temp_Train")
+temp_val_dir = os.path.join(data_dir, "Temp_Validation")
+# Image dimensions
+img_height, img_width = 128, 128
+# Limit the number of images for training and validation
+max_images_per_class = 12000 # Adjust as needed
+def count_images(directory):
+    """Count the number of real and fake images in a directory."""
+    real_count = len(os.listdir(os.path.join(directory, 'Real')))
+    fake_count = len(os.listdir(os.path.join(directory, 'Fake')))
+    return real_count, fake_count
+def prepare_limited_dataset(source_dir, target_dir, max_images):
+    """Create a temporary dataset with a limited number of images per class."""
+    if os.path.exists(target_dir):
+        shutil.rmtree(target_dir)
+    os.makedirs(os.path.join(target_dir, 'Real'), exist_ok=True)
+    os.makedirs(os.path.join(target_dir, 'Fake'), exist_ok=True)
+    for class_dir in ['Real', 'Fake']:
+        class_path = os.path.join(source_dir, class_dir)
+        target_class_path = os.path.join(target_dir, class_dir)
+        all_images = os.listdir(class_path)
+        random.shuffle(all_images)
+        selected_images = all_images[:max_images]
+        for image_name in selected_images:
+            shutil.copy(os.path.join(class_path, image_name), target_class_path)
+def get_processed_images_info(generator):
+    """Calculate information about processed images from a generator."""
+    n_samples = generator.n
+    n_classes = len(generator.class_indices)
+    batch_size = generator.batch_size
+    steps_per_epoch = int(np.ceil(n_samples / batch_size))
+    class_distribution = {}
+    for class_name, class_index in generator.class_indices.items():
+        class_count = len(generator.classes[generator.classes == class_index])
+        class_distribution[class_name] = class_count
+    return {
+        'total_samples': n_samples,
+        'batch_size': batch_size,
+        'steps_per_epoch': steps_per_epoch,
+        'class_distribution': class_distribution
+    }
+# Print initial image counts
+print("\nInitial image counts:")
+train_real, train_fake = count_images(train_dir)
+val_real, val_fake = count_images(val_dir)
+print(f"Training - Real: {train_real}, Fake: {train_fake}")
+print(f"Validation - Real: {val_real}, Fake: {val_fake}")
+# Prepare temporary directories with limited images
+prepare_limited_dataset(train_dir, temp_train_dir, max_images_per_class)
+prepare_limited_dataset(val_dir, temp_val_dir, max_images_per_class)
+# Print filtered image counts
+print("\nAfter filtering:")
+train_real, train_fake = count_images(temp_train_dir)
+val_real, val_fake = count_images(temp_val_dir)
+print(f"Training - Real: {train_real}, Fake: {train_fake}")
+print(f"Validation - Real: {val_real}, Fake: {val_fake}")
+# Data generators for training and validation
+datagen = ImageDataGenerator(rescale=1./255)
+train_gen = datagen.flow_from_directory(
+    temp_train_dir,
+    target_size=(img_height, img_width),
+    batch_size=32,
+    class_mode='binary',
+    classes=['Real', 'Fake']
+)
+val_gen = datagen.flow_from_directory(
+    temp_val_dir,
+    target_size=(img_height, img_width),
+    batch_size=32,
+    class_mode='binary',
+    classes=['Real', 'Fake']
+)
+# Get training and validation information
+train_info = get_processed_images_info(train_gen)
+val_info = get_processed_images_info(val_gen)
+print("\nTraining Data Processing Info:")
+print(f"Total training samples: {train_info['total_samples']}")
+print(f"Batch size: {train_info['batch_size']}")
+print(f"Steps per epoch: {train_info['steps_per_epoch']}")
+print("\nClass distribution in training:")
+for class_name, count in train_info['class_distribution'].items():
+    print(f"{class_name}: {count} images")
+print("\nValidation Data Processing Info:")
+print(f"Total validation samples: {val_info['total_samples']}")
+print(f"Batch size: {val_info['batch_size']}")
+print(f"Steps per epoch: {val_info['steps_per_epoch']}")
+print("\nClass distribution in validation:")
+for class_name, count in val_info['class_distribution'].items():
+    print(f"{class_name}: {count} images")
+# Define the CNN model
+model = Sequential([
+    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
+    MaxPooling2D((2, 2)),
+    Conv2D(64, (3, 3), activation='relu'),
+    MaxPooling2D((2, 2)),
+    Conv2D(128, (3, 3), activation='relu'),
+    MaxPooling2D((2, 2)),
+    Flatten(),
+    Dense(128, activation='relu'),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+# Compile the model
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+# Train the model
+history = model.fit(
+    train_gen,
+    steps_per_epoch=train_info['steps_per_epoch'],
+    validation_data=val_gen,
+    validation_steps=val_info['steps_per_epoch'],
+    epochs=10
+)
+# Calculate total images processed
+total_training_images_processed = train_info['total_samples'] * 10  # 10 epochs
+total_validation_images_processed = val_info['total_samples'] * 10  # 10 epochs
+print(f"\nTotal images processed during training: {total_training_images_processed}")
+print(f"Total images processed during validation: {total_validation_images_processed}")
+print(f"Combined total processed: {total_training_images_processed + total_validation_images_processed}")
+# Save the model
+model.save("deepfake_detector.h5")
+# Functions for prediction
+def predict_image(img_path):
+    """Predict whether a single image is real or fake."""
+    img = image.load_img(img_path, target_size=(img_height, img_width))
+    img_array = image.img_to_array(img) / 255.0
+    img_array = np.expand_dims(img_array, axis=0)
+    prediction = model.predict(img_array)
+    return "Fake" if prediction[0][0] > 0.5 else "Real"
+def predict_video(video_path):
+    """Predict whether a video is real or fake by analyzing frames."""
+    cap = cv2.VideoCapture(video_path)
+    fake_count, real_count = 0, 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Preprocess the frame
+        frame_resized = cv2.resize(frame, (img_height, img_width))
+        frame_array = np.array(frame_resized) / 255.0
+        frame_array = np.expand_dims(frame_array, axis=0)
+        # Predict
+        prediction = model.predict(frame_array)
+        if prediction[0][0] > 0.5:
+            fake_count += 1
+        else:
+            real_count += 1
+    cap.release()
+    return "Fake" if fake_count > real_count else "Real"
+# Example usage
+if __name__ == "__main__":
+    # Test an image
+    test_image_path = "C:/Users/ramya/OneDrive - iiit-b/Desktop/test1.jpg"
+    if os.path.exists(test_image_path):
+        image_result = predict_image(test_image_path)
+        print(f"\nTest image prediction: {image_result}")
+    # Test a video (uncomment and modify path as needed)
+    # test_video_path = "example_video.mp4"
+    # if os.path.exists(test_video_path):
+    #     video_result = predict_video(test_video_path)
+    #     print(f"Test video prediction: {video_result}")

deepfake2/deepfake_detector.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fb63513282ef336b9bd11ab762da260a4789913a6268246ff9e96a08344009e
+size 39704352

deepfake2/testing2.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import cv2
+import numpy as np
+import imghdr
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing import image
+from PIL import Image
+from PIL.ExifTags import TAGS
+# Load the saved model
+model_path = "C:/Users/krish/OneDrive - iiit-b/IIITB/Hackathons/Truthtell/complete_nlp_stuff/deepfake2/deepfake_detector.h5"
+model = load_model(model_path)
+# Image dimensions
+img_height, img_width = 128, 128
+# Trained model prediction
+def predict_image(img_path):
+    if not os.path.exists(img_path):
+        return "Image path does not exist."
+    img = image.load_img(img_path, target_size=(img_height, img_width))
+    img_array = image.img_to_array(img) / 255.0
+    img_array = np.expand_dims(img_array, axis=0)
+    prediction = model.predict(img_array)
+    return "Fake" if prediction[0][0] > 0.5 else "Real"
+# Metadata analysis
+def check_metadata(img_path):
+    try:
+        img = Image.open(img_path)
+        exif_data = img._getexif()
+        if not exif_data:
+            return "Fake (missing metadata)"
+        metadata = {TAGS.get(tag): value for tag, value in exif_data.items() if tag in TAGS}
+        return "Real (metadata present)" if metadata else "Fake (missing metadata)"
+    except Exception as e:
+        return f"Error analyzing metadata: {str(e)}"
+# Artifact density analysis
+def analyze_artifacts(img_path):
+    try:
+        img = cv2.imread(img_path)
+        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        laplacian = cv2.Laplacian(img_gray, cv2.CV_64F)
+        mean_var = np.mean(np.var(laplacian))
+        return "Fake (high artifact density)" if mean_var > 10 else "Real"
+    except Exception as e:
+        return f"Error analyzing artifacts: {str(e)}"
+# Noise pattern detection
+def detect_noise_patterns(img_path):
+    try:
+        img = cv2.imread(img_path)
+        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        noise_std = np.std(img_gray)
+        return "Fake (unnatural noise patterns)" if noise_std < 5 else "Real"
+    except Exception as e:
+        return f"Error analyzing noise patterns: {str(e)}"
+# Symmetry analysis
+def calculate_symmetry(img_path):
+    try:
+        img = cv2.imread(img_path)
+        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img_flipped_v = cv2.flip(img_gray, 1)
+        img_flipped_h = cv2.flip(img_gray, 0)
+        vertical_symmetry = 1 - np.mean(np.abs(img_gray - img_flipped_v)) / 255
+        horizontal_symmetry = 1 - np.mean(np.abs(img_gray - img_flipped_h)) / 255
+        return {
+            "Vertical Symmetry": round(vertical_symmetry, 2),
+            "Horizontal Symmetry": round(horizontal_symmetry, 2)
+        }
+    except Exception as e:
+        return {"Error": str(e)}
+# Combine all methods
+def combined_prediction(img_path):
+    results = {}
+    cnn_prediction = predict_image(img_path)
+    results["CNN Prediction"] = cnn_prediction
+    cnn_score = 1 if cnn_prediction == "Fake" else 0
+    metadata_result = check_metadata(img_path)
+    results["Metadata Analysis"] = metadata_result
+    metadata_score = 1 if "Fake" in metadata_result else 0
+    artifact_result = analyze_artifacts(img_path)
+    results["Artifact Analysis"] = artifact_result
+    artifact_score = 1 if "Fake" in artifact_result else 0
+    noise_result = detect_noise_patterns(img_path)
+    results["Noise Pattern Analysis"] = noise_result
+    noise_score = 1 if "Fake" in noise_result else 0
+    symmetry_results = calculate_symmetry(img_path)
+    results["Symmetry Analysis"] = symmetry_results
+    vertical_symmetry = symmetry_results.get("Vertical Symmetry", 0)
+    horizontal_symmetry = symmetry_results.get("Horizontal Symmetry", 0)
+    symmetry_score = 0
+    if vertical_symmetry != "Unknown" and horizontal_symmetry != "Unknown":
+        if vertical_symmetry > 0.9 or horizontal_symmetry > 0.9:
+            symmetry_score = 1
+    total_score = (cnn_score * 0.4 + metadata_score * 0.1 +
+                   artifact_score * 0.15 + noise_score * 0.15 +
+                   symmetry_score * 0.2)
+    results["Final Prediction"] = "Fake" if total_score > 0.5 else "Real"
+    results["Confidence Score"] = round(total_score, 2)
+    return results
+# Main function
+# if __name__ == "__main__":
+    # test_image_path = "C:/Users/ramya/OneDrive - iiit-b/Desktop/test1.jpg"
+#     if os.path.exists(test_image_path):
+#         final_results = combined_prediction(test_image_path)
+#         print("\nCombined Prediction Results:")
+#         for key, value in final_results.items():
+#             if isinstance(value, dict):
+#                 print(f"{key}:")
+#                 for sub_key, sub_value in value.items():
+#                     print(f"  {sub_key}: {sub_value}")
+#             else:
+#                 print(f"{key}: {value}")

final.py CHANGED Viewed

@@ -6,6 +6,10 @@ import pickle
 import pandas as pd
 import google.generativeai as genai
 import json
 # Load spaCy for NER
 nlp = spacy.load("en_core_web_sm")
@@ -18,7 +22,7 @@ model.eval()
 #########################
 def setup_gemini():
-    genai.configure(api_key='AIzaSyAQzWpSyWyYCM1G5f-G0ulRCQkXuY7admA')
     model = genai.GenerativeModel('gemini-pro')
     return model
 #########################
@@ -114,57 +118,6 @@ def predict_news(text):
     # You can enhance this by combining the scores from both predictions
     return ml_prediction if ml_prediction == kg_prediction else "UNCERTAIN"
-#########################
-# def analyze_content_gemini(model, text):
-#     prompt = f"""Analyze this news text and provide results in the following JSON-like format:
-#     TEXT: {text}
-#     Please provide analysis in these specific sections:
-#     1. GEMINI ANALYSIS:
-#        - Predicted Classification: [Real/Fake]
-#        - Confidence Score: [0-100%]
-#        - Reasoning: [Key points for classification]
-#     2. TEXT CLASSIFICATION:
-#         - Content category/topic
-#         - Writing style: [Formal/Informal/Clickbait]
-#         - Target audience
-#         - Content type: [news/opinion/editorial]
-#     3. SENTIMENT ANALYSIS:
-#        - Primary emotion
-#        - Emotional intensity (1-10)
-#        - Sensationalism Level: [High/Medium/Low]
-#        - Bias Indicators: [List if any]
-#        - Tone: (formal/informal), [Professional/Emotional/Neutral]
-#        - Key emotional triggers
-#     4. ENTITY RECOGNITION:
-#         - Source Credibility: [High/Medium/Low]
-#        - People mentioned
-#        - Organizations
-#        - Locations
-#        - Dates/Time references
-#        - Key numbers/statistics
-#     5. CONTEXT EXTRACTION:
-#        - Main narrative/story
-#        - Supporting elements
-#        - Key claims
-#        - Narrative structure
-#     6. FACT CHECKING:
-#        - Verifiable Claims: [List main claims]
-#        - Evidence Present: [Yes/No]
-#        - Fact Check Score: [0-100%]
-#     Format the response clearly with distinct sections."""
-#     response = model.generate_content(prompt)
-#     return response.text
 def analyze_content_gemini(model, text):
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
     {{

 import pandas as pd
 import google.generativeai as genai
 import json
+import os
+import dotenv
+dotenv.load_dotenv()
 # Load spaCy for NER
 nlp = spacy.load("en_core_web_sm")
 #########################
 def setup_gemini():
+    genai.configure(api_key=os.getenv("GEMINI_API"))
     model = genai.GenerativeModel('gemini-pro')
     return model
 #########################
     # You can enhance this by combining the scores from both predictions
     return ml_prediction if ml_prediction == kg_prediction else "UNCERTAIN"
 def analyze_content_gemini(model, text):
     prompt = f"""Analyze this news text and return a JSON object with the following structure:
     {{