Spaces:

VicGerardoPR
/

BudtenderGuide

Sleeping

BudtenderGuide / utils /interpret_lab_pdf.py

Upload 4 files

01c821e verified 9 months ago

1.4 kB

	import fitz # PyMuPDF
	from transformers import pipeline, AutoTokenizer

	generator = pipeline("text-generation", model="tiiuae/falcon-rw-1b")
	tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")

	def extract_relevant_text(pdf_path):
	doc = fitz.open(pdf_path)
	text = ""
	for page in doc:
	page_text = page.get_text()
	for line in page_text.splitlines():
	if "Terpenes" in line or "Cannabinoids" in line:
	text += line + "\n"
	elif any(sub in line for sub in ["mg/g", "%", "THC", "CBD", "Myrcene", "Limonene", "Caryophyllene", "Humulene", "Linalool", "Pinene", "Ocimene"]):
	text += line + "\n"
	return text

	def analyze_pdf(pdf_path):
	text = extract_relevant_text(pdf_path)

	prefix = (
	"Eres un experto en cannabis medicinal. Analiza los siguientes datos del análisis de una cepa de cannabis. "
	"Describe sus efectos, usos terapéuticos y el perfil del strain según sus niveles de terpenos y cannabinoides.\n\n"
	)

	full_input = prefix + text
	tokens = tokenizer(full_input, truncation=True, max_length=1024, return_tensors="pt")
	truncated_input = tokenizer.decode(tokens["input_ids"][0], skip_special_tokens=True)

	result = generator(truncated_input, max_new_tokens=300, do_sample=True)
	return result[0]['generated_text'].split("Interpretación:")[-1].strip()