|
|
from langchain.prompts import PromptTemplate
|
|
|
from backend.langchain_tools import llm, deepseek_tool
|
|
|
from langchain_core.runnables import RunnableSequence
|
|
|
import json
|
|
|
import re
|
|
|
|
|
|
|
|
|
claim_classification_prompt = PromptTemplate.from_template("""
|
|
|
You are an expert language analyst trained to classify text-based user claims. Your task is to analyze a given piece of text and classify it into one of the following precise categories, based on meaning, structure, and intention:
|
|
|
|
|
|
Classify the following text into one of these categories:
|
|
|
1. Factual Claim β A statement that can be verified or disproven using evidence.
|
|
|
2. Opinion β A personal belief or viewpoint that cannot be objectively proven.
|
|
|
3. Misleading Claim β A statement that could mislead or distort facts.
|
|
|
4. Exaggeration β A statement that overstates facts or makes things seem more dramatic.
|
|
|
5. Factoid β A trivial, unverifiable claim that seems factual but lacks evidence.
|
|
|
6. Question β A statement framed as a question, seeking information.
|
|
|
7. Joke/Hyperbole β A statement made in jest or exaggeration, not to be taken literally.
|
|
|
8. Testimonial/Personal Experience β A personal account or anecdote.
|
|
|
9. Propaganda/Manipulative Claim β A claim designed to manipulate public opinion.
|
|
|
|
|
|
Text: "{claim}"
|
|
|
|
|
|
Respond in JSON format:
|
|
|
{{
|
|
|
"category": "<one of the above categories>",
|
|
|
"reasoning": "<brief justification>"
|
|
|
}}
|
|
|
""")
|
|
|
|
|
|
|
|
|
claim_chain: RunnableSequence = claim_classification_prompt | llm
|
|
|
|
|
|
|
|
|
prompt_template_str = claim_classification_prompt.template
|
|
|
|
|
|
|
|
|
def classify_claim(claim_text: str) -> dict:
|
|
|
try:
|
|
|
|
|
|
result = claim_chain.invoke({"claim": claim_text})
|
|
|
classification = json.loads(result.content.strip())
|
|
|
|
|
|
if 'category' not in classification or 'reasoning' not in classification:
|
|
|
raise ValueError("Invalid classification format received from the model.")
|
|
|
|
|
|
return classification
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
try:
|
|
|
print(f"Error with LLM: {e}. Falling back to DeepSeek-V3.")
|
|
|
|
|
|
|
|
|
deepseek_prompt = prompt_template_str.format(claim=claim_text)
|
|
|
|
|
|
|
|
|
deepseek_result = deepseek_tool.invoke({"input": deepseek_prompt})
|
|
|
print("Raw DeepSeek Output:", deepseek_result)
|
|
|
|
|
|
|
|
|
cleaned_output = re.sub(r"```(?:json)?\s*([\s\S]*?)\s*```", r"\1", deepseek_result.strip())
|
|
|
|
|
|
|
|
|
deepseek_classification = json.loads(cleaned_output)
|
|
|
|
|
|
if 'category' not in deepseek_classification or 'reasoning' not in deepseek_classification:
|
|
|
raise ValueError("Invalid classification format received from DeepSeek-V3.")
|
|
|
|
|
|
return deepseek_classification
|
|
|
|
|
|
except Exception as fallback_e:
|
|
|
return {"error": f"An error occurred with both LLM and DeepSeek-V3: {str(fallback_e)}"}
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
claim = "modi is prime minister"
|
|
|
result = classify_claim(claim)
|
|
|
print(result)
|
|
|
|