cipherunhsiv commited on
Commit
73de916
·
verified ·
1 Parent(s): 4ca0516

create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import torch
4
+ import gradio as gr
5
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
6
+ from llama_index import ServiceContext, SimpleDirectoryReader, VectorStoreIndex
7
+ from llama_index.llms import HuggingFaceLLM
8
+ from llama_index.prompts.prompts import SimpleInputPrompt
9
+ from llama_index.embeddings import LangchainEmbedding
10
+
11
+ def download_pdf_from_url(url, save_path="/content/Data/input.pdf"):
12
+ response = requests.get(url)
13
+ if response.status_code == 200:
14
+ with open(save_path, 'wb') as file:
15
+ file.write(response.content)
16
+ print(f"PDF downloaded and saved to {save_path}")
17
+ else:
18
+ print(f"Failed to download PDF. Status code: {response.status_code}")
19
+
20
+ def mod(pdf_url):
21
+ if not os.path.exists("/content/Data/"):
22
+ os.makedirs("/content/Data/")
23
+ download_pdf_from_url(pdf_url)
24
+ documents = SimpleDirectoryReader("/content/Data/").load_data()
25
+ system_prompt = """You are an expert share market document summarizer specializing in creating concise, comprehensive summaries tailored for professional audiences. Your goal is to summarize pdf which may also include tabular columns, as
26
+ accurately as possible based on the instructions and context provided."""
27
+ query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
28
+ from huggingface_hub import login
29
+ #
30
+ #
31
+ #
32
+ llm = HuggingFaceLLM(
33
+ context_window=4096,
34
+ max_new_tokens=750,
35
+ generate_kwargs={"temperature": 0.5, "do_sample": False},
36
+ system_prompt=system_prompt,
37
+ query_wrapper_prompt=query_wrapper_prompt,
38
+ tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
39
+ model_name="mistralai/Mistral-7B-Instruct-v0.1",
40
+ device_map="auto",
41
+ model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
42
+ )
43
+ embed_model = LangchainEmbedding(
44
+ HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
45
+ )
46
+ service_context = ServiceContext.from_defaults(
47
+ chunk_size=1024,
48
+ llm=llm,
49
+ embed_model=embed_model
50
+ )
51
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
52
+ query_engine = index.as_query_engine()
53
+ response = query_engine.query("""You are an expert share market document summarizer specializing in creating concise, comprehensive summaries tailored for professional audiences. Your task is to analyze the given document and generate a structured summary in approximately 500 words. Ensure the summary:
54
+ Captures all key points, including data, insights, and observations.
55
+ Clearly outlines the context, such as the purpose of the document and relevant background information.
56
+ Summarizes tabular data and numerical figures effectively, while retaining accuracy and relevance.
57
+ Highlights significant trends, comparisons, or impacts mentioned in the document.
58
+ Uses formal and precise language suitable for a corporate or academic audience.
59
+ The output should be well-organized with clear headings or bullet points where applicable. Avoid omitting any critical information, and focus on maintaining a balance between brevity and detail.""")
60
+ return str(response.response)
61
+
62
+ def func(url):
63
+ return mod(url)
64
+
65
+ iface = gr.Interface(
66
+ fn=func,
67
+ inputs="text",
68
+ outputs=gr.Textbox(
69
+ label="Output Summary",
70
+ placeholder="The summary will appear here . . .",
71
+ lines=10,
72
+ interactive=False),
73
+ examples=[['https://cdn-sn.samco.in/ec90fa5b637541d3c86fdb86f45d920c.pdf'],
74
+ ['https://cdn-sn.samco.in/7c8616b72b4aa639c0eda9f44285ab1d.pdf'],
75
+ ['https://cdn-sn.samco.in/a4b95bc0bdb8361459a8b41bfc0ff317.pdf']],
76
+ flagging_options=["Useful", "Mediocre 50-50", "Not Useful"],
77
+ description="Flag it for every response and classify it according to what you feel!"
78
+ )
79
+
80
+ iface.launch(share=True, debug=True)