Zeggai Abdellah commited on
Commit
bc69312
·
1 Parent(s): 9ec487c

try fix the trigger for the genration

Browse files
Files changed (2) hide show
  1. app.py +88 -59
  2. requirements.txt +0 -0
app.py CHANGED
@@ -1,15 +1,17 @@
1
- from fastapi import FastAPI
2
  import json
3
  from dotenv import load_dotenv
4
- import requests
5
  import time
6
  import uuid
7
  from typing import List, Dict
8
  from datetime import datetime
 
 
 
9
  # Load environment variables from .env file
10
  load_dotenv()
 
11
  from langchain_google_genai import GoogleGenerativeAI
12
- import os
13
 
14
  app = FastAPI()
15
 
@@ -30,20 +32,9 @@ def estimate_difficulty(question: str, q_type: str) -> str:
30
  return "medium"
31
  return "hard" # applied
32
 
33
-
34
- def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-flash") -> List[Dict]:
35
  """
36
  Generate French questions for a given document chunk using the Gemini API.
37
-
38
- Args:
39
- chunk (str): A chunk of text from the vaccine guide (in French).
40
- chunk_id (int): Chunk identifier.
41
- api_key (str): Gemini API key.
42
- client: Gemini API client instance.
43
- model (str): Model name for Gemini API.
44
-
45
- Returns:
46
- List[Dict]: List of questions with metadata.
47
  """
48
  prompt = f"""
49
  À partir du texte suivant d'un guide sur les vaccins en français, générez 3 questions variées (factual, conceptual, applied) qui couvrent le contenu de manière exhaustive.
@@ -71,26 +62,14 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-f
71
  """
72
 
73
  try:
74
- # Initialize the LLM - using GoogleGenerativeAI instead of ChatGoogleGenerativeAI
75
  llm = GoogleGenerativeAI(
76
  model=model,
77
  google_api_key=os.getenv("GOOGLE_API_KEY")
78
  )
79
 
80
- # Generate response using langchain
81
  response = llm.invoke(prompt)
82
-
83
 
84
- # Debug: Print raw response to inspect structure
85
- print(f"Raw response for chunk {chunk_id}: {response}")
86
-
87
- # Parse the response (adjust based on actual Gemini API response structure)
88
- questions_text = ""
89
- if hasattr(response, 'candidates') and response.candidates:
90
- questions_text = response.candidates[0].content.parts[0].text if response.candidates[0].content.parts else ""
91
-
92
- # Debug: Print extracted text
93
- print(f"Extracted questions_text for chunk {chunk_id}: {questions_text}")
94
 
95
  # Strip Markdown code fences
96
  if questions_text.startswith("```json\n") and questions_text.endswith("\n```"):
@@ -98,10 +77,6 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-f
98
  elif questions_text.startswith("```") and questions_text.endswith("```"):
99
  questions_text = questions_text[3:-3].strip()
100
 
101
- # Debug: Print cleaned text
102
- print(f"Cleaned questions_text for chunk {chunk_id}: {questions_text}")
103
-
104
- # Parse JSON
105
  if not questions_text:
106
  print(f"Erreur: Réponse vide pour le chunk {chunk_id}")
107
  return []
@@ -120,7 +95,7 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-f
120
  "type": q["type"],
121
  "difficulty": difficulty,
122
  "training_purpose": "Knowledge Recall" if q["type"] == "factual" else "Reasoning",
123
- "validated": False # Flag for expert review
124
  })
125
 
126
  return formatted_questions
@@ -132,16 +107,9 @@ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-f
132
  print(f"Erreur de parsing de la réponse API pour le chunk {chunk_id}: {e}")
133
  return []
134
 
135
- def generate_questions_for_document(chunks: List[str],) -> Dict:
136
  """
137
  Generate questions for all document chunks and structure as a scientific dataset.
138
-
139
- Args:
140
- chunks (List[str]): List of document chunks.
141
- api_key (str): Gemini API key.
142
-
143
- Returns:
144
- Dict: Dataset with header and questions.
145
  """
146
  all_questions = []
147
 
@@ -151,7 +119,6 @@ def generate_questions_for_document(chunks: List[str],) -> Dict:
151
  all_questions.extend(questions)
152
  time.sleep(9) # Rate limiting
153
 
154
- # Create dataset with scientific structure
155
  dataset = {
156
  "dataset_info": {
157
  "title": "Vaccine Guide Question-Answer Dataset",
@@ -168,27 +135,89 @@ def generate_questions_for_document(chunks: List[str],) -> Dict:
168
 
169
  return dataset
170
 
171
- def save_dataset(dataset: Dict, output_file: str):
172
  """
173
- Save dataset to a JSON file.
174
-
175
- Args:
176
- dataset (Dict): The dataset to save.
177
- output_file (str): Path to output JSON file.
178
  """
179
- with open(output_file, 'w', encoding='utf-8') as f:
 
180
  json.dump(dataset, f, indent=4, ensure_ascii=False)
181
- print(f"Dataset saved to {output_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- if __name__ == "__main__":
184
- import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # Load the chunks from the JSON file
187
- with open("Data/Processed_Data/chunks.json", "r", encoding="utf-8") as f:
188
- chunks_data = json.load(f)
 
 
 
 
 
 
189
 
190
- VACCINE_CHUNKS=[chunks_data[0]["text"]]
191
- dataset = generate_questions_for_document(VACCINE_CHUNKS)
192
- save_dataset(dataset, "vaccine_questions.json")
193
- # Run the FastAPI app
194
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, HTTPException, FileResponse
2
  import json
3
  from dotenv import load_dotenv
 
4
  import time
5
  import uuid
6
  from typing import List, Dict
7
  from datetime import datetime
8
+ from huggingface_hub import HfApi # For file persistence in Spaces
9
+ import os
10
+
11
  # Load environment variables from .env file
12
  load_dotenv()
13
+
14
  from langchain_google_genai import GoogleGenerativeAI
 
15
 
16
  app = FastAPI()
17
 
 
32
  return "medium"
33
  return "hard" # applied
34
 
35
+ def generate_questions_for_chunk(chunk: str, chunk_id: int, model="gemini-2.0-flash") -> List[Dict]:
 
36
  """
37
  Generate French questions for a given document chunk using the Gemini API.
 
 
 
 
 
 
 
 
 
 
38
  """
39
  prompt = f"""
40
  À partir du texte suivant d'un guide sur les vaccins en français, générez 3 questions variées (factual, conceptual, applied) qui couvrent le contenu de manière exhaustive.
 
62
  """
63
 
64
  try:
 
65
  llm = GoogleGenerativeAI(
66
  model=model,
67
  google_api_key=os.getenv("GOOGLE_API_KEY")
68
  )
69
 
 
70
  response = llm.invoke(prompt)
 
71
 
72
+ questions_text = str(response) # Convert response to string
 
 
 
 
 
 
 
 
 
73
 
74
  # Strip Markdown code fences
75
  if questions_text.startswith("```json\n") and questions_text.endswith("\n```"):
 
77
  elif questions_text.startswith("```") and questions_text.endswith("```"):
78
  questions_text = questions_text[3:-3].strip()
79
 
 
 
 
 
80
  if not questions_text:
81
  print(f"Erreur: Réponse vide pour le chunk {chunk_id}")
82
  return []
 
95
  "type": q["type"],
96
  "difficulty": difficulty,
97
  "training_purpose": "Knowledge Recall" if q["type"] == "factual" else "Reasoning",
98
+ "validated": False
99
  })
100
 
101
  return formatted_questions
 
107
  print(f"Erreur de parsing de la réponse API pour le chunk {chunk_id}: {e}")
108
  return []
109
 
110
+ def generate_questions_for_document(chunks: List[str]) -> Dict:
111
  """
112
  Generate questions for all document chunks and structure as a scientific dataset.
 
 
 
 
 
 
 
113
  """
114
  all_questions = []
115
 
 
119
  all_questions.extend(questions)
120
  time.sleep(9) # Rate limiting
121
 
 
122
  dataset = {
123
  "dataset_info": {
124
  "title": "Vaccine Guide Question-Answer Dataset",
 
135
 
136
  return dataset
137
 
138
+ def save_dataset_to_space(dataset: Dict, filename: str):
139
  """
140
+ Save dataset to a file in the Space's persistent storage
 
 
 
 
141
  """
142
+ persistent_path = f"/home/user/{filename}"
143
+ with open(persistent_path, 'w', encoding='utf-8') as f:
144
  json.dump(dataset, f, indent=4, ensure_ascii=False)
145
+ print(f"Dataset saved to {persistent_path}")
146
+
147
+ # Optionally upload to Space files
148
+ try:
149
+ api = HfApi(token=os.getenv("HF_TOKEN"))
150
+ api.upload_file(
151
+ path_or_fileobj=persistent_path,
152
+ path_in_repo=filename,
153
+ repo_id=os.getenv("SPACE_ID"),
154
+ repo_type="space"
155
+ )
156
+ print(f"File {filename} uploaded to Space")
157
+ except Exception as e:
158
+ print(f"Could not upload to Space: {e}")
159
 
160
+ @app.get("/generate-questions")
161
+ async def generate_questions():
162
+ """
163
+ Endpoint to generate questions from the vaccine guide chunks
164
+ """
165
+ try:
166
+ # Try to load chunks from different possible locations
167
+ chunks_paths = [
168
+ "Data/Processed_Data/chunks.json",
169
+ "chunks.json",
170
+ "/home/user/chunks.json"
171
+ ]
172
+
173
+ chunks_data = None
174
+ for path in chunks_paths:
175
+ try:
176
+ with open(path, "r", encoding="utf-8") as f:
177
+ chunks_data = json.load(f)
178
+ break
179
+ except FileNotFoundError:
180
+ continue
181
+
182
+ if chunks_data is None:
183
+ raise HTTPException(status_code=404, detail="Chunks file not found in any known location")
184
+
185
+ VACCINE_CHUNKS = [chunks_data[0]["text"]]
186
+ dataset = generate_questions_for_document(VACCINE_CHUNKS)
187
+
188
+ # Save to persistent storage
189
+ filename = "vaccine_questions.json"
190
+ save_dataset_to_space(dataset, filename)
191
+
192
+ return {
193
+ "status": "success",
194
+ "message": "Questions generated successfully",
195
+ "dataset_info": dataset["dataset_info"],
196
+ "download_url": f"/download/{filename}"
197
+ }
198
+ except Exception as e:
199
+ raise HTTPException(status_code=500, detail=str(e))
200
+
201
+ @app.get("/download/{filename}")
202
+ async def download_file(filename: str):
203
+ """
204
+ Endpoint to download generated files
205
+ """
206
+ file_path = f"/home/user/{filename}"
207
+ if os.path.exists(file_path):
208
+ return FileResponse(file_path, media_type="application/json", filename=filename)
209
+ raise HTTPException(status_code=404, detail="File not found")
210
 
211
+ @app.get("/")
212
+ async def root():
213
+ return {
214
+ "message": "Vaccine Question Generator API",
215
+ "endpoints": {
216
+ "POST /generate-questions": "Generate questions from vaccine guide",
217
+ "GET /download/{filename}": "Download generated files"
218
+ }
219
+ }
220
 
221
+ if __name__ == "__main__":
222
+ import uvicorn
 
 
223
  uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ