Spaces:

Kakaarot
/

AI-Powered-News-Digest-Dynamo

Sleeping

App Files Files Community

Kakaarot commited on Apr 20

Commit

272c653

verified ·

1 Parent(s): 571665d

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -14

app.py CHANGED Viewed

@@ -9,11 +9,11 @@ import os
 import json
 # Configure Gemini API with key from Hugging Face Secrets
 api_key = os.getenv("GEMINI_API_KEY")
 genai.configure(api_key=api_key)
 # Precomputed data and embeddings
 articles = [
     "Climate change accelerates, with 2024 as the hottest year. Rising sea levels threaten coastal cities.",
@@ -26,23 +26,43 @@ articles = [
 ]
 # Generate embeddings
-embedding_model = "models/text-embedding-004"
 df = pd.DataFrame({"article": articles})
 @retry(tries=3, delay=2, backoff=2)
 def get_embedding(text):
     try:
         result = genai.embed_content(model=embedding_model, content=text, task_type="RETRIEVAL_DOCUMENT")
-        return result["embedding"]
     except Exception as e:
         print(f"Embedding error: {e}")
         raise
-df["embedding"] = df["article"].apply(get_embedding)
 # Initialize ChromaDB
 client_db = chromadb.Client()
 collection = client_db.get_or_create_collection("news_articles")
 for idx, row in df.iterrows():
     collection.add(
         documents=[row["article"]],
@@ -63,7 +83,7 @@ def search_articles(query, top_k=3):
         return []
 # RAG and Structured Q&A
-generation_model = genai.GenerativeModel("gemini-1.5-pro-002")
 @retry(tries=3, delay=2, backoff=2)
 def generate_response(query, articles, system_message):
@@ -71,6 +91,13 @@ def generate_response(query, articles, system_message):
         return "No relevant articles found.", json.dumps({"error": "No relevant articles found."})
     context = "\n".join(articles)
     prompt = f"""
     {system_message}
     Based on the following articles, provide a concise summary (under 100 words) and a structured JSON response with 'question', 'answer', and 'source'. Use only the provided context.
@@ -85,7 +112,20 @@ def generate_response(query, articles, system_message):
     - JSON:
     """
     try:
-        response = generation_model.generate_content(prompt, stream=False)
         full_text = response.text
         # Parse response
@@ -93,26 +133,32 @@ def generate_response(query, articles, system_message):
         summary = full_text[full_text.find("- Summary:") + len("- Summary:"):summary_end].strip() if "- Summary:" in full_text else "Summary not generated."
         qa_json = full_text[summary_end + len("- JSON:"):].strip()
         try:
             qa = json.loads(qa_json)
         except json.JSONDecodeError:
-            qa = {"error": "Failed to parse JSON response."}
         return summary, json.dumps(qa, indent=2)
     except Exception as e:
         print(f"RAG error: {e}")
-        return "Error generating response.", json.dumps({"error": "Failed to generate response."})
 def respond(message, history, system_message="You are a news summarizer and Q&A assistant.", max_tokens=512, temperature=0.7, top_p=0.95):
     articles = search_articles(message)
     summary, qa = generate_response(message, articles, system_message)
-    # Fix f-string syntax error by pre-formatting outside f-string
-    articles_text = "\n".join(articles) if articles else "None"
     response = (
         "**Relevant Articles:**\n"
-        f"{articles_text}\n"
         "**Summary:**\n"
-        f"{summary}\n"
         "**Structured Q&A:**\n"
         f"{qa}"
     )
@@ -132,4 +178,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import json
 # Configure Gemini API with key from Hugging Face Secrets
 api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    raise ValueError("GEMINI_API_KEY environment variable not set")
 genai.configure(api_key=api_key)
 # Precomputed data and embeddings
 articles = [
     "Climate change accelerates, with 2024 as the hottest year. Rising sea levels threaten coastal cities.",
 ]
 # Generate embeddings
+embedding_model = "models/embedding-001"  # Update to correct model name
 df = pd.DataFrame({"article": articles})
 @retry(tries=3, delay=2, backoff=2)
 def get_embedding(text):
     try:
         result = genai.embed_content(model=embedding_model, content=text, task_type="RETRIEVAL_DOCUMENT")
+        # Extract embedding correctly based on API response structure
+        embedding = result.embedding
+        return embedding
     except Exception as e:
         print(f"Embedding error: {e}")
         raise
+# Generate all embeddings first
+all_embeddings = []
+for article in articles:
+    try:
+        embedding = get_embedding(article)
+        all_embeddings.append(embedding)
+    except Exception as e:
+        print(f"Failed to embed article: {article[:30]}... Error: {e}")
+        all_embeddings.append([0] * 768)  # Default embedding dimension, adjust if needed
+df["embedding"] = all_embeddings
 # Initialize ChromaDB
 client_db = chromadb.Client()
 collection = client_db.get_or_create_collection("news_articles")
+# Clear existing data to avoid duplicates
+try:
+    collection.delete(ids=[str(i) for i in range(len(df))])
+except:
+    pass  # Collection might be empty
+# Add documents to collection
 for idx, row in df.iterrows():
     collection.add(
         documents=[row["article"]],
         return []
 # RAG and Structured Q&A
+generation_model = genai.GenerativeModel("gemini-1.5-pro")  # Verify model name
 @retry(tries=3, delay=2, backoff=2)
 def generate_response(query, articles, system_message):
         return "No relevant articles found.", json.dumps({"error": "No relevant articles found."})
     context = "\n".join(articles)
+    safety_settings = [
+        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+    ]
     prompt = f"""
     {system_message}
     Based on the following articles, provide a concise summary (under 100 words) and a structured JSON response with 'question', 'answer', and 'source'. Use only the provided context.
     - JSON:
     """
     try:
+        generation_config = {
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "top_k": 40,
+            "max_output_tokens": 1024,
+        }
+        response = generation_model.generate_content(
+            prompt,
+            generation_config=generation_config,
+            safety_settings=safety_settings,
+            stream=False
+        )
         full_text = response.text
         # Parse response
         summary = full_text[full_text.find("- Summary:") + len("- Summary:"):summary_end].strip() if "- Summary:" in full_text else "Summary not generated."
         qa_json = full_text[summary_end + len("- JSON:"):].strip()
+        # Clean up the JSON string to make it parseable
+        qa_json = qa_json.replace("``````", "").strip()
         try:
             qa = json.loads(qa_json)
         except json.JSONDecodeError:
+            print(f"JSON parse error. Raw string: {qa_json}")
+            qa = {"error": "Failed to parse JSON response.", "raw_text": qa_json}
         return summary, json.dumps(qa, indent=2)
     except Exception as e:
         print(f"RAG error: {e}")
+        return "Error generating response.", json.dumps({"error": f"Failed to generate response: {str(e)}"})
 def respond(message, history, system_message="You are a news summarizer and Q&A assistant.", max_tokens=512, temperature=0.7, top_p=0.95):
     articles = search_articles(message)
     summary, qa = generate_response(message, articles, system_message)
+    # Format articles for display
+    articles_text = "\n".join([f"- {article}" for article in articles]) if articles else "None found"
     response = (
         "**Relevant Articles:**\n"
+        f"{articles_text}\n\n"
         "**Summary:**\n"
+        f"{summary}\n\n"
         "**Structured Q&A:**\n"
         f"{qa}"
     )
 )
 if __name__ == "__main__":
+    demo.launch()