Spaces:

Hammad712
/

grading

Sleeping

App Files Files Community

Hammad712 commited on Mar 4, 2025

Commit

16aaeed

verified ·

1 Parent(s): b7f6319

Update main.py

Browse files

Files changed (1) hide show

main.py +21 -15

main.py CHANGED Viewed

@@ -6,18 +6,18 @@ from pydantic import BaseModel
 from langchain_groq import ChatGroq
 from langchain.document_loaders import PyPDFLoader
-# Load API key securely from environment variable
 API_KEY = os.getenv("GROQ_API_KEY")
 if not API_KEY:
     raise ValueError("GROQ_API_KEY environment variable not set.")
 app = FastAPI(title="PDF Question Extractor", version="1.0")
-# Pydantic model for response
 class ExtractionResult(BaseModel):
     answers: List[str]
-# Initialize LLM
 def get_llm():
     return ChatGroq(
         model="llama-3.3-70b-versatile",
@@ -28,6 +28,15 @@ def get_llm():
 llm = get_llm()
 @app.post("/extract-answers/")
 async def extract_answers(file: UploadFile = File(...)):
     try:
@@ -36,37 +45,34 @@ async def extract_answers(file: UploadFile = File(...)):
         with open(file_path, "wb") as buffer:
             buffer.write(file.file.read())
-        # Load and extract text from PDF
         loader = PyPDFLoader(file_path)
         pages = loader.load_and_split()
         all_page_content = "\n".join(page.page_content for page in pages)
-        # JSON schema definition
         schema_dict = ExtractionResult.model_json_schema()
         schema = json.dumps(schema_dict, indent=2)
-        # System message
         system_message = (
-            "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
-            "The output must be a JSON object that strictly follows the schema: " + schema
         )
-        # User message
         user_message = (
             "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
             + all_page_content
         )
-        # Construct final prompt
         prompt = system_message + "\n\n" + user_message
-        # Get LLM response
         response = llm.invoke(prompt, response_format={"type": "json_object"})
-        # Parse and validate response
         result = ExtractionResult.model_validate_json(response.content)
-        # Cleanup
         os.remove(file_path)
         return result.model_dump()

 from langchain_groq import ChatGroq
 from langchain.document_loaders import PyPDFLoader
+# Securely load your Groq API key from environment variables
 API_KEY = os.getenv("GROQ_API_KEY")
 if not API_KEY:
     raise ValueError("GROQ_API_KEY environment variable not set.")
 app = FastAPI(title="PDF Question Extractor", version="1.0")
+# Define the expected JSON response schema
 class ExtractionResult(BaseModel):
     answers: List[str]
+# Initialize the language model (LLM)
 def get_llm():
     return ChatGroq(
         model="llama-3.3-70b-versatile",
 llm = get_llm()
+# Root endpoint: Provides a welcome message and instructions
+@app.get("/")
+async def root():
+    return {
+        "message": "Welcome to the PDF Question Extractor API.",
+        "usage": "POST your PDF to /extract-answers/ to extract answers."
+    }
+# PDF extraction endpoint: Processes a PDF file upload
 @app.post("/extract-answers/")
 async def extract_answers(file: UploadFile = File(...)):
     try:
         with open(file_path, "wb") as buffer:
             buffer.write(file.file.read())
+        # Load and split the PDF into pages
         loader = PyPDFLoader(file_path)
         pages = loader.load_and_split()
         all_page_content = "\n".join(page.page_content for page in pages)
+        # Generate the JSON schema from the Pydantic model
         schema_dict = ExtractionResult.model_json_schema()
         schema = json.dumps(schema_dict, indent=2)
+        # Build the prompt with system and user messages
         system_message = (
+            "You are a document analysis tool that extracts the options and correct answers "
+            "from the provided document content. The output must be a JSON object that strictly follows the schema: "
+            + schema
         )
         user_message = (
             "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
             + all_page_content
         )
         prompt = system_message + "\n\n" + user_message
+        # Invoke the LLM and request a JSON response
         response = llm.invoke(prompt, response_format={"type": "json_object"})
+        # Validate and parse the JSON response using Pydantic
         result = ExtractionResult.model_validate_json(response.content)
+        # Cleanup the temporary file
         os.remove(file_path)
         return result.model_dump()