Ali Abdullah commited on
Commit
7d8c76e
·
verified ·
1 Parent(s): 2032f5f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +15 -13
main.py CHANGED
@@ -1,35 +1,33 @@
1
  from fastapi import FastAPI, UploadFile, File, Form
2
  from fastapi.responses import JSONResponse
3
  from pydantic import BaseModel
4
- from groq import Groq
5
- from langchain_community.document_loaders import WebBaseLoader
6
-
7
  import os
8
  import io
9
  from dotenv import load_dotenv
10
  from PIL import Image
11
  import pytesseract
12
  import whisper
13
-
 
14
  from docx import Document
15
  import pandas as pd
16
  import PyPDF2
 
17
 
 
18
  load_dotenv()
19
-
20
  pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
21
  ffmpeg_path = os.getenv("FFMPEG_PATH", "/usr/bin")
22
  os.environ["PATH"] += os.pathsep + ffmpeg_path
23
 
24
  app = FastAPI()
25
-
26
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
27
 
28
  UPLOAD_DIR = "uploaded_files"
29
  os.makedirs(UPLOAD_DIR, exist_ok=True)
30
-
31
  MAX_FILE_SIZE_MB = 10
32
 
 
33
  def extract_text_from_file(file_path):
34
  ext = os.path.splitext(file_path)[-1].lower()
35
  if ext == ".txt":
@@ -48,6 +46,7 @@ def extract_text_from_file(file_path):
48
  else:
49
  return "❌ Unsupported file type."
50
 
 
51
  @app.post("/chat-with-file")
52
  async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)):
53
  try:
@@ -72,6 +71,7 @@ async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)
72
  except Exception as e:
73
  return JSONResponse(status_code=500, content={"error": str(e)})
74
 
 
75
  class URLQuery(BaseModel):
76
  url: str
77
  question: str
@@ -79,22 +79,24 @@ class URLQuery(BaseModel):
79
  @app.post("/chat-with-url")
80
  async def chat_with_url(data: URLQuery):
81
  try:
82
- loader = WebBaseLoader(data.url, header_template={"User-Agent": "Mozilla/5.0"})
83
- documents = loader.load()
84
- web_content = "\n".join([doc.page_content for doc in documents])
 
 
85
 
86
  response = client.chat.completions.create(
87
  model="llama3-8b-8192",
88
  messages=[
89
  {"role": "system", "content": "You are a helpful assistant. Use the website content to answer the user's question."},
90
- {"role": "user", "content": f"Website Content:\n{web_content}\n\nNow answer this question:\n{data.question}"}
91
  ]
92
  )
93
  return {"answer": response.choices[0].message.content}
94
  except Exception as e:
95
  return JSONResponse(status_code=500, content={"error": str(e)})
96
-
97
 
 
98
  @app.post("/extract-text-from-image")
99
  async def extract_text_from_image(file: UploadFile = File(...)):
100
  try:
@@ -104,8 +106,8 @@ async def extract_text_from_image(file: UploadFile = File(...)):
104
  return {"answer": text.strip() or "⚠️ No text extracted."}
105
  except Exception as e:
106
  return JSONResponse(status_code=500, content={"error": str(e)})
107
-
108
 
 
109
  @app.post("/transcribe-audio")
110
  async def transcribe_audio(file: UploadFile = File(...)):
111
  try:
 
1
  from fastapi import FastAPI, UploadFile, File, Form
2
  from fastapi.responses import JSONResponse
3
  from pydantic import BaseModel
 
 
 
4
  import os
5
  import io
6
  from dotenv import load_dotenv
7
  from PIL import Image
8
  import pytesseract
9
  import whisper
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
  from docx import Document
13
  import pandas as pd
14
  import PyPDF2
15
+ from groq import Groq
16
 
17
+ # Load environment variables
18
  load_dotenv()
 
19
  pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
20
  ffmpeg_path = os.getenv("FFMPEG_PATH", "/usr/bin")
21
  os.environ["PATH"] += os.pathsep + ffmpeg_path
22
 
23
  app = FastAPI()
 
24
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
25
 
26
  UPLOAD_DIR = "uploaded_files"
27
  os.makedirs(UPLOAD_DIR, exist_ok=True)
 
28
  MAX_FILE_SIZE_MB = 10
29
 
30
+ # ========== File Text Extraction ==========
31
  def extract_text_from_file(file_path):
32
  ext = os.path.splitext(file_path)[-1].lower()
33
  if ext == ".txt":
 
46
  else:
47
  return "❌ Unsupported file type."
48
 
49
+ # ========== Chat with File ==========
50
  @app.post("/chat-with-file")
51
  async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)):
52
  try:
 
71
  except Exception as e:
72
  return JSONResponse(status_code=500, content={"error": str(e)})
73
 
74
+ # ========== Chat with URL ==========
75
  class URLQuery(BaseModel):
76
  url: str
77
  question: str
 
79
  @app.post("/chat-with-url")
80
  async def chat_with_url(data: URLQuery):
81
  try:
82
+ headers = {"User-Agent": "Mozilla/5.0"}
83
+ res = requests.get(data.url, headers=headers, timeout=10)
84
+ soup = BeautifulSoup(res.text, "html.parser")
85
+ web_content = soup.get_text(separator="\n")
86
+ trimmed_content = web_content[:8000] # limit for Groq
87
 
88
  response = client.chat.completions.create(
89
  model="llama3-8b-8192",
90
  messages=[
91
  {"role": "system", "content": "You are a helpful assistant. Use the website content to answer the user's question."},
92
+ {"role": "user", "content": f"{trimmed_content}\n\nNow answer this question:\n{data.question}"}
93
  ]
94
  )
95
  return {"answer": response.choices[0].message.content}
96
  except Exception as e:
97
  return JSONResponse(status_code=500, content={"error": str(e)})
 
98
 
99
+ # ========== Extract Text from Image ==========
100
  @app.post("/extract-text-from-image")
101
  async def extract_text_from_image(file: UploadFile = File(...)):
102
  try:
 
106
  return {"answer": text.strip() or "⚠️ No text extracted."}
107
  except Exception as e:
108
  return JSONResponse(status_code=500, content={"error": str(e)})
 
109
 
110
+ # ========== Transcribe Audio ==========
111
  @app.post("/transcribe-audio")
112
  async def transcribe_audio(file: UploadFile = File(...)):
113
  try: