TanU21 commited on
Commit
c14ec06
·
verified ·
1 Parent(s): f5636f3

Update app.py

Browse files

Modify the model...

Files changed (1) hide show
  1. app.py +42 -38
app.py CHANGED
@@ -1,58 +1,62 @@
1
  from langchain_community.document_loaders import PDFPlumberLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
- from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
4
  import gradio as gr
5
  import os
6
  from langchain_groq import ChatGroq
 
7
 
 
 
8
  def process_pdf(file):
9
  try:
10
- loader = PDFPlumberLoader(file.name)
 
 
 
 
11
  result = loader.load()
12
- splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
13
- return splitter.split_documents(result)
14
- except Exception as e:
15
- return f"Error processing PDF: {str(e)}"
16
 
 
 
 
17
 
 
 
 
18
 
19
  def initialize_llm():
20
- groq_api_key = os.environ.get("GROQ_API_KEY")
 
21
  if not groq_api_key:
22
  raise ValueError("GROQ_API_KEY environment variable not set.")
23
 
24
  return ChatGroq(
25
- model="mixtral-8x7b-32768",
26
  temperature=0.7,
27
  api_key=groq_api_key,
28
- verbose = False
29
- )
30
- def create_prompt():
31
- examples = [
32
- {"input": "What is the main topic discussed in the document?",
33
- "output": "The document discusses the concept and details of Neural Networks."},
34
- {"input": "Explain the term 'activation function' as used in this document.",
35
- "output": "An activation function in the context of this document refers to a mathematical function applied to neurons' output to introduce non-linearity in the model."}
36
- ]
37
-
38
- example_template = PromptTemplate(
39
- input_variables=["input", "output"],
40
- template="Human: {input}\nAssistant: {output}"
41
  )
42
 
43
- return FewShotPromptTemplate(
44
- examples=examples,
45
- example_prompt=example_template,
46
- prefix="You are an AI assistant that provides specific and accurate answers based on the provided document.",
47
- suffix="Human: {input}\nAssistant:",
48
- input_variables=["input"]
 
 
 
 
49
  )
50
 
51
- def generate_answer(chain, user_input):
52
  try:
53
- response = chain.invoke({"input": user_input})
54
- answer=response.content
55
- return answer
56
  except Exception as e:
57
  return f"Error generating answer: {str(e)}"
58
 
@@ -60,18 +64,18 @@ def handle_file(file, user_input):
60
  if not file:
61
  return "Please upload a PDF document."
62
 
63
- data = process_pdf(file)
64
- if isinstance(data, str):
65
- return data
 
 
 
66
 
67
  llm = initialize_llm()
68
  prompt = create_prompt()
69
  chain = prompt | llm
70
 
71
- if not user_input.strip():
72
- return "Please enter a question."
73
-
74
- return generate_answer(chain, user_input)
75
 
76
  interface = gr.Interface(
77
  fn=handle_file,
 
1
  from langchain_community.document_loaders import PDFPlumberLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_core.prompts import PromptTemplate
4
  import gradio as gr
5
  import os
6
  from langchain_groq import ChatGroq
7
+ from dotenv import load_dotenv
8
 
9
+
10
+ MAX_DOC_LENGTH = 4000
11
  def process_pdf(file):
12
  try:
13
+ temp_path = file.name
14
+ if not os.path.exists(temp_path):
15
+ return "Error: Uploaded file path does not exist."
16
+
17
+ loader = PDFPlumberLoader(temp_path)
18
  result = loader.load()
19
+ splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
20
+ split_docs = splitter.split_documents(result)
 
 
21
 
22
+ # Extract text from the split documents
23
+ document_text = "\n".join([doc.page_content for doc in split_docs])
24
+ document_text = document_text[:MAX_DOC_LENGTH]
25
 
26
+ return document_text # Returning the full document text
27
+ except Exception as e:
28
+ return f"Error processing PDF: {str(e)}"
29
 
30
  def initialize_llm():
31
+ load_dotenv()
32
+ groq_api_key = os.getenv("GROQ_API_KEY")
33
  if not groq_api_key:
34
  raise ValueError("GROQ_API_KEY environment variable not set.")
35
 
36
  return ChatGroq(
37
+ model= "llama3-8b-8192",
38
  temperature=0.7,
39
  api_key=groq_api_key,
40
+ verbose=False
 
 
 
 
 
 
 
 
 
 
 
 
41
  )
42
 
43
+ def create_prompt():
44
+ return PromptTemplate(
45
+ input_variables=["document", "question"],
46
+ template=(
47
+ "You are an AI assistant that provides precise answers based on the given document. "
48
+ "Use only the information available in the document to respond.\n\n"
49
+ "Document:\n{document}\n\n"
50
+ "Question: {question}\n"
51
+ "Answer:"
52
+ )
53
  )
54
 
55
+ def generate_answer(chain, document_text, user_input):
56
  try:
57
+ response = chain.invoke({"document": document_text, "question": user_input})
58
+ answer = response.content
59
+ return str(answer)
60
  except Exception as e:
61
  return f"Error generating answer: {str(e)}"
62
 
 
64
  if not file:
65
  return "Please upload a PDF document."
66
 
67
+ document_text = process_pdf(file)
68
+ if isinstance(document_text, str) and document_text.startswith("Error"):
69
+ return document_text # Return error message if processing failed
70
+
71
+ if not user_input.strip():
72
+ return "Please enter a question."
73
 
74
  llm = initialize_llm()
75
  prompt = create_prompt()
76
  chain = prompt | llm
77
 
78
+ return generate_answer(chain, document_text, user_input)
 
 
 
79
 
80
  interface = gr.Interface(
81
  fn=handle_file,