Spaces:

Hammad712
/

grading

Sleeping

App Files Files Community

Hammad712 commited on Mar 4, 2025

Commit

7f269b9

verified ·

1 Parent(s): afb89fd

Create main.py

Browse files

Files changed (1) hide show

main.py +75 -0

main.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import json
+from typing import List
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from pydantic import BaseModel
+from langchain_groq import ChatGroq
+from langchain.document_loaders import PyPDFLoader
+# Load API key securely from environment variable
+API_KEY = os.getenv("GROQ_API_KEY")
+if not API_KEY:
+    raise ValueError("GROQ_API_KEY environment variable not set.")
+app = FastAPI(title="PDF Question Extractor", version="1.0")
+# Pydantic model for response
+class ExtractionResult(BaseModel):
+    answers: List[str]
+# Initialize LLM
+def get_llm():
+    return ChatGroq(
+        model="llama-3.3-70b-versatile",
+        temperature=0,
+        max_tokens=1024,
+        api_key=API_KEY
+    )
+llm = get_llm()
+@app.post("/extract-answers/")
+async def extract_answers(file: UploadFile = File(...)):
+    try:
+        # Save the uploaded file temporarily
+        file_path = f"./temp_{file.filename}"
+        with open(file_path, "wb") as buffer:
+            buffer.write(file.file.read())
+        # Load and extract text from PDF
+        loader = PyPDFLoader(file_path)
+        pages = loader.load_and_split()
+        all_page_content = "\n".join(page.page_content for page in pages)
+        # JSON schema definition
+        schema_dict = ExtractionResult.model_json_schema()
+        schema = json.dumps(schema_dict, indent=2)
+        # System message
+        system_message = (
+            "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
+            "The output must be a JSON object that strictly follows the schema: " + schema
+        )
+        # User message
+        user_message = (
+            "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
+            + all_page_content
+        )
+        # Construct final prompt
+        prompt = system_message + "\n\n" + user_message
+        # Get LLM response
+        response = llm.invoke(prompt, response_format={"type": "json_object"})
+        # Parse and validate response
+        result = ExtractionResult.model_validate_json(response.content)
+        # Cleanup
+        os.remove(file_path)
+        return result.model_dump()
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))