Ali Abdullah commited on
Commit
75ca6dd
·
verified ·
1 Parent(s): 38dc08b

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +136 -0
main.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from fastapi.responses import JSONResponse
3
+ from pydantic import BaseModel
4
+ from groq import Groq
5
+ from langchain_community.document_loaders import WebBaseLoader
6
+
7
+ import os
8
+ import io
9
+ from dotenv import load_dotenv
10
+ from PIL import Image
11
+ import pytesseract
12
+ import whisper
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Tesseract path
18
+ tesseract_cmd = os.getenv("TESSERACT_CMD")
19
+ if tesseract_cmd:
20
+ pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
21
+
22
+ # FFmpeg path (adjust this path to match where you extracted FFmpeg)
23
+ ffmpeg_path = r"C:\Users\aliab\Downloads\ffmpeg-7.1.1-essentials_build\ffmpeg-7.1.1-essentials_build\bin"
24
+ if os.path.exists(ffmpeg_path):
25
+ os.environ["PATH"] += os.pathsep + ffmpeg_path
26
+
27
+ # File reading libraries
28
+ from docx import Document
29
+ import pandas as pd
30
+ import PyPDF2
31
+
32
+ app = FastAPI()
33
+
34
+ # Use Groq API key from .env
35
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
36
+
37
+ UPLOAD_DIR = "uploaded_files"
38
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
39
+
40
+ MAX_FILE_SIZE_MB = 10
41
+
42
+ # ---------- File Text Extraction ----------
43
+ def extract_text_from_file(file_path):
44
+ ext = os.path.splitext(file_path)[-1].lower()
45
+ if ext == ".txt":
46
+ with open(file_path, "r", encoding="utf-8") as f:
47
+ return f.read()
48
+ elif ext == ".docx":
49
+ doc = Document(file_path)
50
+ return "\n".join([para.text for para in doc.paragraphs])
51
+ elif ext == ".csv":
52
+ df = pd.read_csv(file_path)
53
+ return df.to_string(index=False)
54
+ elif ext == ".pdf":
55
+ with open(file_path, "rb") as f:
56
+ reader = PyPDF2.PdfReader(f)
57
+ return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
58
+ else:
59
+ return "❌ Unsupported file type."
60
+
61
+ # ---------- Chat with File ----------
62
+ @app.post("/chat-with-file")
63
+ async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)):
64
+ try:
65
+ contents = await file.read()
66
+
67
+ if len(contents) > MAX_FILE_SIZE_MB * 1024 * 1024:
68
+ return JSONResponse(status_code=400, content={"error": "❌ File too large. Max size is 10MB."})
69
+
70
+ file_path = os.path.join(UPLOAD_DIR, file.filename)
71
+ with open(file_path, "wb") as f:
72
+ f.write(contents)
73
+
74
+ file_content = extract_text_from_file(file_path)
75
+
76
+ response = client.chat.completions.create(
77
+ model="llama3-8b-8192",
78
+ messages=[
79
+ {"role": "system", "content": "You are a helpful assistant. Use the uploaded file content to answer questions."},
80
+ {"role": "user", "content": f"{file_content}\n\nQuestion: {question}"}
81
+ ]
82
+ )
83
+
84
+ return {"answer": response.choices[0].message.content}
85
+ except Exception as e:
86
+ return JSONResponse(status_code=500, content={"error": str(e)})
87
+
88
+ # ---------- Chat with URL ----------
89
+ class URLQuery(BaseModel):
90
+ url: str
91
+ question: str
92
+
93
+ @app.post("/chat-with-url")
94
+ async def chat_with_url(data: URLQuery):
95
+ try:
96
+ os.environ["USER_AGENT"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
97
+ loader = WebBaseLoader(data.url)
98
+ documents = loader.load()
99
+ web_content = "\n".join([doc.page_content for doc in documents])
100
+
101
+ response = client.chat.completions.create(
102
+ model="llama3-8b-8192",
103
+ messages=[
104
+ {"role": "system", "content": "You are a helpful assistant. Use the website content to answer the user's question."},
105
+ {"role": "user", "content": f"Website Content:\n{web_content}\n\nNow answer this question:\n{data.question}"}
106
+ ]
107
+ )
108
+ return {"answer": response.choices[0].message.content}
109
+ except Exception as e:
110
+ return JSONResponse(status_code=500, content={"error": str(e)})
111
+
112
+ # ---------- Extract Text from Image ----------
113
+ @app.post("/extract-text-from-image")
114
+ async def extract_text_from_image(file: UploadFile = File(...)):
115
+ try:
116
+ contents = await file.read()
117
+ image = Image.open(io.BytesIO(contents))
118
+ text = pytesseract.image_to_string(image)
119
+ return {"answer": text.strip() or "⚠️ No text extracted."}
120
+ except Exception as e:
121
+ return JSONResponse(status_code=500, content={"error": str(e)})
122
+
123
+ # ---------- Transcribe Audio ----------
124
+ @app.post("/transcribe-audio")
125
+ async def transcribe_audio(file: UploadFile = File(...)):
126
+ try:
127
+ contents = await file.read()
128
+ audio_path = os.path.join(UPLOAD_DIR, file.filename)
129
+ with open(audio_path, "wb") as f:
130
+ f.write(contents)
131
+
132
+ model = whisper.load_model("base")
133
+ result = model.transcribe(audio_path)
134
+ return {"answer": result["text"] if result.get("text") else "⚠️ No transcript returned."}
135
+ except Exception as e:
136
+ return JSONResponse(status_code=500, content={"error": str(e)})