Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import pipeline | |
| # β Step 1: Set the Correct Cache Directory (Only HF_HOME) | |
| os.environ["HF_HOME"] = "/app/cache" | |
| # β Step 2: Ensure Cache Directory Exists | |
| os.makedirs("/app/cache", exist_ok=True) | |
| # β Step 3: Load Model from Hugging Face | |
| question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC") | |
| app = FastAPI() | |
| class OCRText(BaseModel): | |
| text: str | |
| def extract_question(data: OCRText): | |
| text = data.text | |
| lines = text.split("\n") | |
| # Use AI Model to Identify Question Parts | |
| ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True) | |
| top_sentences = [line for line in ranked_lines[:3] if len(line) > 10] | |
| question_text = " ".join(top_sentences) | |
| return {"extracted_question": question_text} | |