Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +8 -8
- utils/interpret_lab_pdf.py +13 -16
README.md
CHANGED
|
@@ -4,19 +4,19 @@ emoji: 馃尶
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
tags:
|
| 12 |
-
- cannabis
|
| 13 |
-
- terpenes
|
| 14 |
-
- llm
|
| 15 |
-
- gradio
|
| 16 |
-
- health
|
| 17 |
-
- pdf
|
| 18 |
---
|
| 19 |
|
| 20 |
# 馃尶 Budtender AI Assistant
|
| 21 |
|
| 22 |
-
**Budtender AI Assistant**
|
|
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.15.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
tags:
|
| 12 |
+
- cannabis
|
| 13 |
+
- terpenes
|
| 14 |
+
- llm
|
| 15 |
+
- gradio
|
| 16 |
+
- health
|
| 17 |
+
- pdf
|
| 18 |
---
|
| 19 |
|
| 20 |
# 馃尶 Budtender AI Assistant
|
| 21 |
|
| 22 |
+
**Budtender AI Assistant** analiza certificados de an谩lisis de cannabis, extrayendo solamente las secciones relevantes de terpenos y cannabinoides. Esto permite interpretaciones precisas, r谩pidas y compatibles con modelos livianos de lenguaje natural.
|
utils/interpret_lab_pdf.py
CHANGED
|
@@ -1,37 +1,34 @@
|
|
| 1 |
import fitz # PyMuPDF
|
| 2 |
-
from transformers import pipeline
|
| 3 |
-
from transformers import AutoTokenizer
|
| 4 |
|
| 5 |
generator = pipeline("text-generation", model="tiiuae/falcon-rw-1b")
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
|
| 7 |
|
| 8 |
-
def
|
| 9 |
doc = fitz.open(pdf_path)
|
| 10 |
text = ""
|
| 11 |
for page in doc:
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
return text
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
| 17 |
def analyze_pdf(pdf_path):
|
| 18 |
-
text =
|
| 19 |
|
| 20 |
-
# Prompt fijo
|
| 21 |
prefix = (
|
| 22 |
-
"Eres un experto en cannabis medicinal. Analiza
|
| 23 |
-
"
|
| 24 |
-
|
|
|
|
| 25 |
)
|
| 26 |
|
| 27 |
-
# Codificar prompt + texto completo
|
| 28 |
full_input = prefix + text
|
| 29 |
tokens = tokenizer(full_input, truncation=True, max_length=1024, return_tensors="pt")
|
| 30 |
-
|
| 31 |
-
# Decodificar tokens truncados
|
| 32 |
truncated_input = tokenizer.decode(tokens["input_ids"][0], skip_special_tokens=True)
|
| 33 |
|
| 34 |
-
# Ejecutar modelo con input seguro
|
| 35 |
result = generator(truncated_input, max_new_tokens=300, do_sample=True)
|
| 36 |
return result[0]['generated_text'].split("Interpretaci贸n:")[-1].strip()
|
| 37 |
-
|
|
|
|
| 1 |
import fitz # PyMuPDF
|
| 2 |
+
from transformers import pipeline, AutoTokenizer
|
|
|
|
| 3 |
|
| 4 |
generator = pipeline("text-generation", model="tiiuae/falcon-rw-1b")
|
| 5 |
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
|
| 6 |
|
| 7 |
+
def extract_relevant_text(pdf_path):
|
| 8 |
doc = fitz.open(pdf_path)
|
| 9 |
text = ""
|
| 10 |
for page in doc:
|
| 11 |
+
page_text = page.get_text()
|
| 12 |
+
for line in page_text.splitlines():
|
| 13 |
+
if "Terpenes" in line or "Cannabinoids" in line:
|
| 14 |
+
text += line + "\n"
|
| 15 |
+
elif any(sub in line for sub in ["mg/g", "%", "THC", "CBD", "Myrcene", "Limonene", "Caryophyllene", "Humulene", "Linalool", "Pinene", "Ocimene"]):
|
| 16 |
+
text += line + "\n"
|
| 17 |
return text
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def analyze_pdf(pdf_path):
|
| 20 |
+
text = extract_relevant_text(pdf_path)
|
| 21 |
|
|
|
|
| 22 |
prefix = (
|
| 23 |
+
"Eres un experto en cannabis medicinal. Analiza los siguientes datos del an谩lisis de una cepa de cannabis. "
|
| 24 |
+
"Describe sus efectos, usos terap茅uticos y el perfil del strain seg煤n sus niveles de terpenos y cannabinoides:
|
| 25 |
+
|
| 26 |
+
"
|
| 27 |
)
|
| 28 |
|
|
|
|
| 29 |
full_input = prefix + text
|
| 30 |
tokens = tokenizer(full_input, truncation=True, max_length=1024, return_tensors="pt")
|
|
|
|
|
|
|
| 31 |
truncated_input = tokenizer.decode(tokens["input_ids"][0], skip_special_tokens=True)
|
| 32 |
|
|
|
|
| 33 |
result = generator(truncated_input, max_new_tokens=300, do_sample=True)
|
| 34 |
return result[0]['generated_text'].split("Interpretaci贸n:")[-1].strip()
|
|
|