Spaces:

Manasa1
/

tweets_clone

Sleeping

App Files Files Community

Manasa1 commited on Dec 14, 2024

Commit

c2c3e4f

verified ·

1 Parent(s): fd73a47

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -46

app.py CHANGED Viewed

@@ -1,48 +1,143 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load pre-trained model (or fine-tuned model)
-model_name = "Manasa1/GPT_Finetuned_tweets"  # Replace with the fine-tuned model name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-# Function to generate tweets
-def generate_tweet(input_text):
-    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True, padding=True)
-    outputs = model.generate(
-        inputs['input_ids'],
-        attention_mask=inputs['attention_mask'],
-        max_length=150,  # Limit to 150 tokens for brevity
-        num_return_sequences=1,
-        top_p=0.9,  # Narrow focus to ensure more concise results
-        top_k=40,   # Focus on high-probability words
-        do_sample=True,
-        pad_token_id=tokenizer.pad_token_id
-    )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract the tweet text (exclude prompt if included)
-    return generated_text.strip()
-# Gradio interface
-def main():
-    with gr.Blocks() as interface:
-        gr.Markdown("""
-        # Tweet Generator
-        Enter a topic or idea, and the AI will craft a concise, engaging, and impactful tweet inspired by innovative thought leadership.
-        """)
-        with gr.Row():
-            input_text = gr.Textbox(label="Enter your idea or topic:")
-            output_tweet = gr.Textbox(label="Generated Tweet:", interactive=False)
-        generate_button = gr.Button("Generate Tweet")
-        generate_button.click(generate_tweet, inputs=[input_text], outputs=[output_tweet])
-    return interface
-# Run Gradio app
 if __name__ == "__main__":
-    app = main()
-    app.launch(share=True)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import random
+from datetime import datetime
+from PyPDF2 import PdfReader
+import json
+from dotenv import load_dotenv
+load_dotenv()
+class TweetDatasetProcessor:
+    def __init__(self, fine_tuned_model_name, pdf_path):
+        self.tweets = []
+        self.personality_profile = {}
+        self.vectorizer = None  # No need for vectorizer here since we're not clustering
+        self.used_tweets = set()  # Track used tweets to avoid repetition
+        self.pdf_path = pdf_path
+        # Load fine-tuned model and tokenizer
+        self.model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
+    @staticmethod
+    def _process_line(line):
+        """Process a single line."""
+        line = line.strip()
+        if not line or line.startswith('http'):  # Skip empty lines and URLs
+            return None
+        return {
+            'content': line,
+            'timestamp': datetime.now(),
+            'mentions': [word for word in line.split() if word.startswith('@')],
+            'hashtags': [word for word in line.split() if word.startswith('#')]
+        }
+    def extract_text_from_pdf(self):
+        """Extract text content from PDF file."""
+        reader = PdfReader(self.pdf_path)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text
+    def process_pdf_content(self, text):
+        """Process PDF content and clean extracted tweets."""
+        if not text.strip():
+            raise ValueError("The provided PDF appears to be empty.")
+        lines = text.split('\n')
+        clean_tweets = [TweetDatasetProcessor._process_line(line) for line in lines]
+        self.tweets = [tweet for tweet in clean_tweets if tweet]
+        if not self.tweets:
+            raise ValueError("No tweets were extracted from the PDF. Ensure the content is properly formatted.")
+        return self.tweets
+    def analyze_personality(self, max_tweets=50):
+        """Comprehensive personality analysis using a limited subset of tweets."""
+        if not self.tweets:
+            raise ValueError("No tweets available for personality analysis.")
+        all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
+        analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
+        Core beliefs, emotional tendencies, cognitive patterns, etc.
+        Tweets for analysis:
+        {json.dumps(all_tweets, indent=2)}
+        """
+        input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
+        output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
+        personality_analysis = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        self.personality_profile = personality_analysis
+        return self.personality_profile
+    def generate_tweet(self, context="", sample_size=3):
+        """Generate a new tweet by sampling random tweets and avoiding repetition."""
+        if not self.tweets:
+            return "Error: No tweets available for generation."
+        # Randomly sample unique tweets
+        available_tweets = [tweet for tweet in self.tweets if tweet['content'] not in self.used_tweets]
+        if len(available_tweets) < sample_size:
+            self.used_tweets.clear()  # Reset used tweets if all have been used
+            available_tweets = self.tweets
+        sampled_tweets = random.sample(available_tweets, sample_size)
+        sampled_contents = [tweet['content'] for tweet in sampled_tweets]
+        # Update the used tweets tracker
+        self.used_tweets.update(sampled_contents)
+        # Truncate personality profile to avoid token overflow
+        personality_profile_excerpt = self.personality_profile[:400] if len(self.personality_profile) > 400 else self.personality_profile
+        # Construct the prompt
+        prompt = f"""Based on this personality profile:
+        {personality_profile_excerpt}
+        Current context or topic (if any):
+        {context}
+        Tweets for context:
+        {', '.join(sampled_contents)}
+        **Only generate the tweet. Do not include analysis, explanation, or any other content.**
+        """
+        input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
+        output = self.model.generate(input_ids, max_length=150, num_return_sequences=1, temperature=1.0)
+        generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
+        return generated_tweet
+# Gradio Interface Function
+def gradio_interface():
+    # Path to the PDF with tweets
+    pdf_path = 'path_to_your_pdf.pdf'  # Replace with your PDF file path
+    fine_tuned_model_name = 'path_to_your_fine_tuned_model'  # Replace with the path to your fine-tuned model
+    processor = TweetDatasetProcessor(fine_tuned_model_name, pdf_path)
+    text = processor.extract_text_from_pdf()
+    tweets = processor.process_pdf_content(text)
+    personality_analysis = processor.analyze_personality(max_tweets=50)
+    generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
+    return personality_analysis, generated_tweet
+# Gradio app setup
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[],
+    outputs=[
+        gr.Textbox(label="Personality Analysis"),
+        gr.Textbox(label="Generated Tweet")
+    ],
+    live=True,
+    title="AI Personality and Tweet Generation",
+    description="Automatically analyze personality and generate tweets based on a provided PDF of tweets."
+)
+# Launch the app
 if __name__ == "__main__":
+    iface.launch()