randusertry commited on
Commit
8538df4
·
verified ·
1 Parent(s): 4854a04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py CHANGED
@@ -8,6 +8,68 @@ import shutil
8
 
9
  app = FastAPI()
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @app.post("/extract")
12
  async def extract(file: UploadFile = File(...)):
13
  pdf_bytes = await file.read()
@@ -64,6 +126,57 @@ async def convert_to_markdown(file: UploadFile = File(...)):
64
  if os.path.exists(temp_path):
65
  os.remove(temp_path)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  @app.get("/health")
68
  async def health():
69
  return {
 
8
 
9
  app = FastAPI()
10
 
11
+
12
+ EPUB_CSS = """
13
+ body {
14
+ font-family: serif;
15
+ line-height: 1.5;
16
+ margin: 5%;
17
+ color: #e0e0e0;
18
+ background-color: #1a1a1a;
19
+ }
20
+ h1 {
21
+ text-align: center;
22
+ color: #f4a261;
23
+ text-transform: uppercase;
24
+ margin-bottom: 0.2em;
25
+ }
26
+ h3 {
27
+ border-bottom: 1px solid #333;
28
+ padding-bottom: 5px;
29
+ margin-top: 30px;
30
+ color: #e76f51;
31
+ }
32
+ blockquote {
33
+ font-style: italic;
34
+ border-left: 3px solid #e76f51;
35
+ padding-left: 15px;
36
+ color: #b0b0b0;
37
+ margin: 1.5em 10px;
38
+ }
39
+ li {
40
+ margin-bottom: 8px;
41
+ }
42
+ table {
43
+ width: 100%;
44
+ border-collapse: separate;
45
+ border-spacing: 0;
46
+ margin: 20px 0;
47
+ border: 1px solid #333;
48
+ border-radius: 8px;
49
+ overflow: hidden;
50
+ }
51
+ th {
52
+ background-color: #2d2d2d;
53
+ color: #f4a261;
54
+ font-weight: bold;
55
+ text-align: left;
56
+ padding: 12px;
57
+ border-bottom: 2px solid #3d3d3d;
58
+ }
59
+ td {
60
+ padding: 10px 12px;
61
+ border-bottom: 1px solid #2d2d2d;
62
+ vertical-align: top;
63
+ font-size: 0.95em;
64
+ }
65
+ tr:last-child td {
66
+ border-bottom: none;
67
+ }
68
+ tr:nth-child(even) {
69
+ background-color: #222222;
70
+ }
71
+ """
72
+
73
  @app.post("/extract")
74
  async def extract(file: UploadFile = File(...)):
75
  pdf_bytes = await file.read()
 
126
  if os.path.exists(temp_path):
127
  os.remove(temp_path)
128
 
129
+ @app.post("/export-epub")
130
+ async def export_epub(file: UploadFile = File(...)):
131
+ base_name = os.path.splitext(file.filename)[0]
132
+ temp_input = f"temp_{file.filename}"
133
+ temp_css = "style.css"
134
+ output_epub = f"{base_name}.epub"
135
+
136
+ try:
137
+ # 1. Save the upload
138
+ with open(temp_input, "wb") as buffer:
139
+ content = await file.read()
140
+ buffer.write(content)
141
+
142
+ # 2. Use MarkItDown to get the Markdown content
143
+ result = md.convert(temp_input)
144
+ markdown_content = result.text_content
145
+
146
+ # 3. Create a temporary CSS file for Pandoc
147
+ with open(temp_css, "w") as f:
148
+ f.write(EPUB_CSS)
149
+
150
+ # 4. Call Pandoc to convert Markdown string to EPUB
151
+ # We pipe the markdown_content directly into pandoc's stdin
152
+ process = subprocess.Popen(
153
+ ['pandoc', '--from=markdown', '--to=epub', '--css', temp_css,
154
+ '--metadata', f'title={base_name}', '-o', output_epub],
155
+ stdin=subprocess.PIPE,
156
+ stderr=subprocess.PIPE,
157
+ text=True
158
+ )
159
+ stdout, stderr = process.communicate(input=markdown_content)
160
+
161
+ if process.returncode != 0:
162
+ raise Exception(f"Pandoc Error: {stderr}")
163
+
164
+ # 5. Return the generated EPUB file
165
+ return FileResponse(
166
+ path=output_epub,
167
+ filename=output_epub,
168
+ media_type='application/epub+zip'
169
+ )
170
+
171
+ except Exception as e:
172
+ raise HTTPException(status_code=500, detail=str(e))
173
+
174
+ finally:
175
+ # Cleanup
176
+ for path in [temp_input, temp_css]:
177
+ if os.path.exists(path):
178
+ os.remove(path)
179
+
180
  @app.get("/health")
181
  async def health():
182
  return {