Spaces:

CwAnkit07
/

Question-textractor

Sleeping

Question-textractor / app.py

Update app.py

d7c42fc verified 12 months ago

925 Bytes

	import os
	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import pipeline

	# ✅ Step 1: Set the Correct Cache Directory (Only HF_HOME)
	os.environ["HF_HOME"] = "/app/cache"

	# ✅ Step 2: Ensure Cache Directory Exists
	os.makedirs("/app/cache", exist_ok=True)

	# ✅ Step 3: Load Model from Hugging Face
	question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")

	app = FastAPI()

	class OCRText(BaseModel):
	text: str

	@app.post("/extract_question")
	def extract_question(data: OCRText):
	text = data.text
	lines = text.split("\n")

	# Use AI Model to Identify Question Parts
	ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
	top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]

	question_text = " ".join(top_sentences)

	return {"extracted_question": question_text}