garvitcpp commited on
Commit
753f6f9
·
verified ·
1 Parent(s): cf07549

Update app/services/extraction.py

Browse files
Files changed (1) hide show
  1. app/services/extraction.py +107 -61
app/services/extraction.py CHANGED
@@ -346,10 +346,11 @@ Output: {{
346
  ---
347
  Now process this input:
348
  Input: "{text}"
349
- Output: ONLY provide the JSON data with no additional formatting, markdowns, or annotations.
350
  """}
351
  ]
352
 
 
353
  prompt = ""
354
  for message in messages:
355
  if message["role"] == "system":
@@ -362,66 +363,24 @@ Output: ONLY provide the JSON data with no additional formatting, markdowns, or
362
 
363
  # Generate response
364
  response = model.generate_content(prompt,
365
- generation_config=genai.types.GenerationConfig(
366
- temperature=0,
367
- max_output_tokens=150,
368
- top_p=1
369
- ))
370
 
371
- # Process the response - handle complex responses correctly
372
- try:
373
- # Try the simple accessor first
374
- if hasattr(response, 'text'):
375
- output_text = response.text.strip()
376
- # If that fails, try accessing the parts
377
- elif hasattr(response, 'parts') and response.parts:
378
- output_text = ''.join([part.text for part in response.parts if hasattr(part, 'text')])
379
- # If that fails, try the full path
380
- elif hasattr(response, 'candidates') and response.candidates:
381
- parts = response.candidates[0].content.parts
382
- output_text = ''.join([part.text for part in parts if hasattr(part, 'text')])
383
- else:
384
- # Last resort: try to find any text in the response
385
- output_text = str(response)
386
- if not '{' in output_text: # Check if it looks like JSON
387
- # If we can't extract valid text, return a simple fallback
388
- return {
389
- "category": "",
390
- "calories": "",
391
- "time": "",
392
- "ingredients": ["ingredient from " + text],
393
- "keywords": ["keyword from " + text],
394
- "keywords_name": []
395
- }
396
- except Exception as e:
397
- print(f"Error processing Gemini response: {e}")
398
- # Log additional information about the response for debugging
399
- print(f"Response type: {type(response)}")
400
- print(f"Response attributes: {dir(response)}")
401
- return {
402
- "error": f"Failed to process Gemini response: {str(e)}",
403
- "ingredients": [text], # Include the search text as an ingredient for fallback
404
- "keywords": [text],
405
- "keywords_name": []
406
- }
407
 
408
  try:
409
- # Extract JSON from markdown code blocks if needed
410
- if output_text.strip().startswith('```') and '```' in output_text:
411
- # Extract the content between the code block markers
412
- import re
413
- match = re.search(r'```(?:json)?\n(.*?)\n```', output_text, re.DOTALL)
414
- if match:
415
- output_text = match.group(1).strip()
416
- else:
417
- # Try another pattern without the language specification
418
- match = re.search(r'```(.*?)```', output_text, re.DOTALL)
419
- if match:
420
- output_text = match.group(1).strip()
421
-
422
- # Now parse the cleaned JSON
423
  result = json.loads(output_text)
424
-
425
  # Update category with closest match from dataset
426
  original_category = result["category"]
427
  matched_category = find_closest_category(original_category)
@@ -438,11 +397,98 @@ Output: ONLY provide the JSON data with no additional formatting, markdowns, or
438
  result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "caffeinated", "coffee"]
439
  result["ingredients"] = result.get("ingredients", []) + ["coffee beans", "water"]
440
 
441
- # ... (rest of your conditional processing)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
- except json.JSONDecodeError as e:
444
- print(f"JSON parse error: {e}")
445
- print(f"Failed to parse: {output_text}")
446
  result = {"error": "Failed to parse JSON", "output": output_text}
447
 
448
  return result
 
346
  ---
347
  Now process this input:
348
  Input: "{text}"
349
+ Output:
350
  """}
351
  ]
352
 
353
+ # Send the prompt to OpenAI API
354
  prompt = ""
355
  for message in messages:
356
  if message["role"] == "system":
 
363
 
364
  # Generate response
365
  response = model.generate_content(prompt,
366
+ generation_config=genai.types.GenerationConfig(
367
+ temperature=0,
368
+ max_output_tokens=150,
369
+ top_p=1
370
+ ))
371
 
372
+ # Process the response
373
+ output_text = response.text.strip()
374
+
375
+ # Check if output is wrapped in markdown code blocks and extract the JSON if needed
376
+ if output_text.strip().startswith('```') and '```' in output_text:
377
+ import re
378
+ match = re.search(r'```(?:json)?\n(.*?)\n```', output_text, re.DOTALL)
379
+ if match:
380
+ output_text = match.group(1).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  result = json.loads(output_text)
 
384
  # Update category with closest match from dataset
385
  original_category = result["category"]
386
  matched_category = find_closest_category(original_category)
 
397
  result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "caffeinated", "coffee"]
398
  result["ingredients"] = result.get("ingredients", []) + ["coffee beans", "water"]
399
 
400
+ elif "smoothie bowl" in text.lower():
401
+ result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie bowl"]
402
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie bowl"]
403
+ result["ingredients"] = result.get("ingredients", []) + ["fruits", "yogurt", "granola"]
404
+
405
+ elif "kombucha" in text.lower():
406
+ result["keywords"] = result.get("keywords", []) + ["beverage", "fermented", "kombucha"]
407
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "kombucha"]
408
+ result["ingredients"] = result.get("ingredients", []) + ["tea", "sugar", "SCOBY"]
409
+
410
+ elif "herbal tea" in text.lower():
411
+ result["keywords"] = result.get("keywords", []) + ["beverages", "caffeine-free", "herbal tea"]
412
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "herbal tea"]
413
+ result["ingredients"] = result.get("ingredients", []) + ["herbs", "water"]
414
+
415
+ elif "seaweed" in text.lower():
416
+ result["keywords"] = result.get("keywords", []) + ["ingredient", "seafood", "seaweed"]
417
+ result["keywords_name"] = result.get("keywords_name", []) + ["seaweed"]
418
+ result["ingredients"] = result.get("ingredients", []) + ["seaweed"]
419
+
420
+ elif "vegan cheese" in text.lower():
421
+ result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "cheese"]
422
+ result["keywords_name"] = result.get("keywords_name", []) + ["vegan cheese"]
423
+ result["ingredients"] = result.get("ingredients", []) + ["cashews", "nutritional yeast", "coconut oil"]
424
+
425
+ elif "air fryer" in text.lower():
426
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "air fryer", "healthy"]
427
+ result["keywords_name"] = result.get("keywords_name", []) + ["air fryer"]
428
+ result["ingredients"] = result.get("ingredients", []) # Ingredients vary with recipe, left blank
429
+
430
+ elif "instant pot" in text.lower():
431
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "instant pot", "pressure cooker"]
432
+ result["keywords_name"] = result.get("keywords_name", []) + ["instant pot"]
433
+ result["ingredients"] = result.get("ingredients", []) # Ingredients vary with recipe, left blank
434
+
435
+ elif "sous vide" in text.lower():
436
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "sous vide", "precision cooking"]
437
+ result["keywords_name"] = result.get("keywords_name", []) + ["sous vide"]
438
+ result["ingredients"] = result.get("ingredients", []) # Ingredients vary with recipe, left blank
439
+
440
+ elif "paleo" in text.lower():
441
+ result["keywords"] = result.get("keywords", []) + ["diet", "paleo", "low-carb"]
442
+ result["keywords_name"] = result.get("keywords_name", []) + ["paleo"]
443
+ result["ingredients"] = result.get("ingredients", []) # Ingredients vary with recipe, left blank
444
+
445
+ elif "fodmap" in text.lower():
446
+ result["keywords"] = result.get("keywords", []) + ["diet", "fodmap", "digestive health"]
447
+ result["keywords_name"] = result.get("keywords_name", []) + ["fodmap"]
448
+ result["ingredients"] = result.get("ingredients", []) # Ingredients vary with recipe, left blank
449
+
450
+ elif "cold brew" in text.lower():
451
+ result["keywords"] = result.get("keywords", []) + ["beverages", "caffeinated", "cold coffee"]
452
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "cold brew"]
453
+ result["ingredients"] = result.get("ingredients", []) + ["coffee grounds", "water"]
454
+
455
+ elif "matcha" in text.lower():
456
+ result["keywords"] = result.get("keywords", []) + ["beverages", "green tea", "matcha"]
457
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "matcha"]
458
+ result["ingredients"] = result.get("ingredients", []) + ["matcha powder", "water", "milk"]
459
+
460
+ elif "smoothie" in text.lower():
461
+ result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie"]
462
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie"]
463
+ result["ingredients"] = result.get("ingredients", []) + ["fruits", "milk", "yogurt"]
464
+
465
+ elif "protein shake" in text.lower():
466
+ result["keywords"] = result.get("keywords", []) + ["beverages", "high protein", "shake"]
467
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "protein shake"]
468
+ result["ingredients"] = result.get("ingredients", []) + ["protein powder", "milk", "banana"]
469
+
470
+ elif "oat milk" in text.lower() or "almond milk" in text.lower():
471
+ result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "plant-based milk"]
472
+ result["keywords_name"] = result.get("keywords_name", []) + ["oat milk" if "oat" in text.lower() else "almond milk"]
473
+ result["ingredients"] = result.get("ingredients", []) + ["oats" if "oat" in text.lower() else "almonds", "water"]
474
+
475
+ elif "zoodles" in text.lower():
476
+ result["keywords"] = result.get("keywords", []) + ["low carb", "gluten-free", "vegetable noodles", "noodles"]
477
+ result["keywords_name"] = result.get("keywords_name", []) + ["zoodles", "noodles"]
478
+ result["ingredients"] = result.get("ingredients", []) + ["zucchini"]
479
+
480
+ elif "avocado toast" in text.lower():
481
+ result["keywords"] = result.get("keywords", []) + ["breakfast", "healthy", "avocado"]
482
+ result["keywords_name"] = result.get("keywords_name", []) + ["avocado toast"]
483
+ result["ingredients"] = result.get("ingredients", []) + ["avocado", "bread"]
484
+
485
+ elif "golden milk" in text.lower():
486
+ result["keywords"] = result.get("keywords", []) + ["beverage", "turmeric", "anti-inflammatory"]
487
+ result["keywords_name"] = result.get("keywords_name", []) + ["golden milk"]
488
+ result["ingredients"] = result.get("ingredients", []) + ["turmeric", "milk", "honey", "spices"]
489
+ # other cases...
490
 
491
+ except json.JSONDecodeError:
 
 
492
  result = {"error": "Failed to parse JSON", "output": output_text}
493
 
494
  return result