Kakaarot commited on
Commit
272c653
·
verified ·
1 Parent(s): 571665d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -14
app.py CHANGED
@@ -9,11 +9,11 @@ import os
9
  import json
10
 
11
  # Configure Gemini API with key from Hugging Face Secrets
12
-
13
  api_key = os.getenv("GEMINI_API_KEY")
 
 
14
  genai.configure(api_key=api_key)
15
 
16
-
17
  # Precomputed data and embeddings
18
  articles = [
19
  "Climate change accelerates, with 2024 as the hottest year. Rising sea levels threaten coastal cities.",
@@ -26,23 +26,43 @@ articles = [
26
  ]
27
 
28
  # Generate embeddings
29
- embedding_model = "models/text-embedding-004"
30
  df = pd.DataFrame({"article": articles})
31
 
32
  @retry(tries=3, delay=2, backoff=2)
33
  def get_embedding(text):
34
  try:
35
  result = genai.embed_content(model=embedding_model, content=text, task_type="RETRIEVAL_DOCUMENT")
36
- return result["embedding"]
 
 
37
  except Exception as e:
38
  print(f"Embedding error: {e}")
39
  raise
40
 
41
- df["embedding"] = df["article"].apply(get_embedding)
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Initialize ChromaDB
44
  client_db = chromadb.Client()
45
  collection = client_db.get_or_create_collection("news_articles")
 
 
 
 
 
 
 
 
46
  for idx, row in df.iterrows():
47
  collection.add(
48
  documents=[row["article"]],
@@ -63,7 +83,7 @@ def search_articles(query, top_k=3):
63
  return []
64
 
65
  # RAG and Structured Q&A
66
- generation_model = genai.GenerativeModel("gemini-1.5-pro-002")
67
 
68
  @retry(tries=3, delay=2, backoff=2)
69
  def generate_response(query, articles, system_message):
@@ -71,6 +91,13 @@ def generate_response(query, articles, system_message):
71
  return "No relevant articles found.", json.dumps({"error": "No relevant articles found."})
72
 
73
  context = "\n".join(articles)
 
 
 
 
 
 
 
74
  prompt = f"""
75
  {system_message}
76
  Based on the following articles, provide a concise summary (under 100 words) and a structured JSON response with 'question', 'answer', and 'source'. Use only the provided context.
@@ -85,7 +112,20 @@ def generate_response(query, articles, system_message):
85
  - JSON:
86
  """
87
  try:
88
- response = generation_model.generate_content(prompt, stream=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  full_text = response.text
90
 
91
  # Parse response
@@ -93,26 +133,32 @@ def generate_response(query, articles, system_message):
93
  summary = full_text[full_text.find("- Summary:") + len("- Summary:"):summary_end].strip() if "- Summary:" in full_text else "Summary not generated."
94
  qa_json = full_text[summary_end + len("- JSON:"):].strip()
95
 
 
 
 
96
  try:
97
  qa = json.loads(qa_json)
98
  except json.JSONDecodeError:
99
- qa = {"error": "Failed to parse JSON response."}
 
100
 
101
  return summary, json.dumps(qa, indent=2)
102
  except Exception as e:
103
  print(f"RAG error: {e}")
104
- return "Error generating response.", json.dumps({"error": "Failed to generate response."})
105
 
106
  def respond(message, history, system_message="You are a news summarizer and Q&A assistant.", max_tokens=512, temperature=0.7, top_p=0.95):
107
  articles = search_articles(message)
108
  summary, qa = generate_response(message, articles, system_message)
109
- # Fix f-string syntax error by pre-formatting outside f-string
110
- articles_text = "\n".join(articles) if articles else "None"
 
 
111
  response = (
112
  "**Relevant Articles:**\n"
113
- f"{articles_text}\n"
114
  "**Summary:**\n"
115
- f"{summary}\n"
116
  "**Structured Q&A:**\n"
117
  f"{qa}"
118
  )
@@ -132,4 +178,4 @@ demo = gr.ChatInterface(
132
  )
133
 
134
  if __name__ == "__main__":
135
- demo.launch()
 
9
  import json
10
 
11
  # Configure Gemini API with key from Hugging Face Secrets
 
12
  api_key = os.getenv("GEMINI_API_KEY")
13
+ if not api_key:
14
+ raise ValueError("GEMINI_API_KEY environment variable not set")
15
  genai.configure(api_key=api_key)
16
 
 
17
  # Precomputed data and embeddings
18
  articles = [
19
  "Climate change accelerates, with 2024 as the hottest year. Rising sea levels threaten coastal cities.",
 
26
  ]
27
 
28
  # Generate embeddings
29
+ embedding_model = "models/embedding-001" # Update to correct model name
30
  df = pd.DataFrame({"article": articles})
31
 
32
  @retry(tries=3, delay=2, backoff=2)
33
  def get_embedding(text):
34
  try:
35
  result = genai.embed_content(model=embedding_model, content=text, task_type="RETRIEVAL_DOCUMENT")
36
+ # Extract embedding correctly based on API response structure
37
+ embedding = result.embedding
38
+ return embedding
39
  except Exception as e:
40
  print(f"Embedding error: {e}")
41
  raise
42
 
43
+ # Generate all embeddings first
44
+ all_embeddings = []
45
+ for article in articles:
46
+ try:
47
+ embedding = get_embedding(article)
48
+ all_embeddings.append(embedding)
49
+ except Exception as e:
50
+ print(f"Failed to embed article: {article[:30]}... Error: {e}")
51
+ all_embeddings.append([0] * 768) # Default embedding dimension, adjust if needed
52
+
53
+ df["embedding"] = all_embeddings
54
 
55
  # Initialize ChromaDB
56
  client_db = chromadb.Client()
57
  collection = client_db.get_or_create_collection("news_articles")
58
+
59
+ # Clear existing data to avoid duplicates
60
+ try:
61
+ collection.delete(ids=[str(i) for i in range(len(df))])
62
+ except:
63
+ pass # Collection might be empty
64
+
65
+ # Add documents to collection
66
  for idx, row in df.iterrows():
67
  collection.add(
68
  documents=[row["article"]],
 
83
  return []
84
 
85
  # RAG and Structured Q&A
86
+ generation_model = genai.GenerativeModel("gemini-1.5-pro") # Verify model name
87
 
88
  @retry(tries=3, delay=2, backoff=2)
89
  def generate_response(query, articles, system_message):
 
91
  return "No relevant articles found.", json.dumps({"error": "No relevant articles found."})
92
 
93
  context = "\n".join(articles)
94
+ safety_settings = [
95
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
96
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
97
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
98
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
99
+ ]
100
+
101
  prompt = f"""
102
  {system_message}
103
  Based on the following articles, provide a concise summary (under 100 words) and a structured JSON response with 'question', 'answer', and 'source'. Use only the provided context.
 
112
  - JSON:
113
  """
114
  try:
115
+ generation_config = {
116
+ "temperature": 0.7,
117
+ "top_p": 0.95,
118
+ "top_k": 40,
119
+ "max_output_tokens": 1024,
120
+ }
121
+
122
+ response = generation_model.generate_content(
123
+ prompt,
124
+ generation_config=generation_config,
125
+ safety_settings=safety_settings,
126
+ stream=False
127
+ )
128
+
129
  full_text = response.text
130
 
131
  # Parse response
 
133
  summary = full_text[full_text.find("- Summary:") + len("- Summary:"):summary_end].strip() if "- Summary:" in full_text else "Summary not generated."
134
  qa_json = full_text[summary_end + len("- JSON:"):].strip()
135
 
136
+ # Clean up the JSON string to make it parseable
137
+ qa_json = qa_json.replace("``````", "").strip()
138
+
139
  try:
140
  qa = json.loads(qa_json)
141
  except json.JSONDecodeError:
142
+ print(f"JSON parse error. Raw string: {qa_json}")
143
+ qa = {"error": "Failed to parse JSON response.", "raw_text": qa_json}
144
 
145
  return summary, json.dumps(qa, indent=2)
146
  except Exception as e:
147
  print(f"RAG error: {e}")
148
+ return "Error generating response.", json.dumps({"error": f"Failed to generate response: {str(e)}"})
149
 
150
  def respond(message, history, system_message="You are a news summarizer and Q&A assistant.", max_tokens=512, temperature=0.7, top_p=0.95):
151
  articles = search_articles(message)
152
  summary, qa = generate_response(message, articles, system_message)
153
+
154
+ # Format articles for display
155
+ articles_text = "\n".join([f"- {article}" for article in articles]) if articles else "None found"
156
+
157
  response = (
158
  "**Relevant Articles:**\n"
159
+ f"{articles_text}\n\n"
160
  "**Summary:**\n"
161
+ f"{summary}\n\n"
162
  "**Structured Q&A:**\n"
163
  f"{qa}"
164
  )
 
178
  )
179
 
180
  if __name__ == "__main__":
181
+ demo.launch()