ChatBotsTA commited on
Commit
6a55d23
·
verified ·
1 Parent(s): 0aa1db8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import pdfplumber
4
+ from huggingface_hub import InferenceClient
5
+ from PIL import Image
6
+ import gradio as gr
7
+ import base64
8
+
9
+ # ---------------- CONFIG ----------------
10
+ LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"
11
+ TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
12
+ SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
13
+
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+ GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
16
+
17
+ client = None
18
+ if GROQ_TOKEN:
19
+ client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
20
+ elif HF_TOKEN:
21
+ client = InferenceClient(api_key=HF_TOKEN)
22
+
23
+ # ---------------- HELPERS ----------------
24
+ def pdf_to_text(file):
25
+ text_chunks = []
26
+ pages = 0
27
+ with pdfplumber.open(file) as pdf:
28
+ pages = len(pdf.pages)
29
+ for page in pdf.pages:
30
+ ptext = page.extract_text() or ""
31
+ text_chunks.append(ptext)
32
+ return "\n\n".join(text_chunks), pages
33
+
34
+ def llama_summarize(text):
35
+ messages = [
36
+ {"role": "system", "content": "You are a concise summarizer. Give 6 short bullet points."},
37
+ {"role": "user", "content": f"Summarize this document in 6 concise bullet points:\n\n{text}"}
38
+ ]
39
+ resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
40
+ return resp.choices[0].message["content"]
41
+
42
+ def llama_chat(history, question):
43
+ messages = history + [{"role": "user", "content": question}]
44
+ resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
45
+ return resp.choices[0].message["content"]
46
+
47
+ def tts_synthesize(text):
48
+ audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
49
+ return audio_bytes
50
+
51
+ def generate_image(prompt_text):
52
+ img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
53
+ return Image.open(io.BytesIO(img_bytes))
54
+
55
+ def ask_question_and_maybe_diagram(chat_text, question, history):
56
+ if not history:
57
+ history = [{"role": "system", "content": f"Document context:\n{chat_text[:4000]}"}]
58
+ ans = llama_chat(history, question)
59
+ history.append({"role": "user", "content": question})
60
+ history.append({"role": "assistant", "content": ans})
61
+
62
+ diagram_img = None
63
+ if question.strip().lower().startswith("!diagram"):
64
+ prompt = question[len("!diagram"):].strip()
65
+ if prompt:
66
+ diagram_img = generate_image(prompt)
67
+ return ans, diagram_img, history
68
+
69
+ # ---------------- GRADIO INTERFACE ----------------
70
+ with gr.Blocks() as demo:
71
+ gr.Markdown("## 📄 PDF Buddy — Summarize • Speak • Chat • Draw")
72
+
73
+ with gr.Row():
74
+ pdf_file = gr.File(label="Upload PDF", type="file")
75
+ extract_status = gr.Textbox(label="Status")
76
+
77
+ extracted_text = gr.Textbox(label="Document Preview", lines=10)
78
+
79
+ with gr.Row():
80
+ summarize_btn = gr.Button("📝 Summarize")
81
+ summary_output = gr.Textbox(label="Summary", lines=6)
82
+
83
+ tts_btn = gr.Button("🔊 Synthesize Summary to Audio")
84
+ audio_out = gr.Audio(label="Audio", type="filepath")
85
+
86
+ chat_question = gr.Textbox(label="Ask a question (use !diagram for image)")
87
+ chat_btn = gr.Button("❓ Ask")
88
+ chat_output = gr.Textbox(label="Answer")
89
+ diagram_out = gr.Image(label="Diagram (optional)")
90
+ chat_history_state = gr.State()
91
+
92
+ # ---------------- CALLBACKS ----------------
93
+ pdf_file.change(
94
+ lambda f: pdf_to_text(f) if f else ("No file uploaded", "", None),
95
+ inputs=pdf_file,
96
+ outputs=[extract_status, extracted_text]
97
+ )
98
+
99
+ summarize_btn.click(
100
+ lambda text: llama_summarize(text[:30000]) if text else "No text to summarize",
101
+ inputs=extracted_text,
102
+ outputs=summary_output
103
+ )
104
+
105
+ tts_btn.click(
106
+ lambda summary: tts_synthesize(summary) if summary else None,
107
+ inputs=summary_output,
108
+ outputs=audio_out
109
+ )
110
+
111
+ chat_btn.click(
112
+ ask_question_and_maybe_diagram,
113
+ inputs=[extracted_text, chat_question, chat_history_state],
114
+ outputs=[chat_output, diagram_out, chat_history_state]
115
+ )
116
+
117
+ demo.launch()