Spaces:

Al1Abdullah
/

AI_Chatbot_File_Web_Image_Audio

Sleeping

App Files Files Community

Ali Abdullah commited on Jun 26, 2025

Commit

7d8c76e

verified ·

1 Parent(s): 2032f5f

Update main.py

Browse files

Files changed (1) hide show

main.py +15 -13

main.py CHANGED Viewed

@@ -1,35 +1,33 @@
 from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-from groq import Groq
-from langchain_community.document_loaders import WebBaseLoader
 import os
 import io
 from dotenv import load_dotenv
 from PIL import Image
 import pytesseract
 import whisper
 from docx import Document
 import pandas as pd
 import PyPDF2
 load_dotenv()
 pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
 ffmpeg_path = os.getenv("FFMPEG_PATH", "/usr/bin")
 os.environ["PATH"] += os.pathsep + ffmpeg_path
 app = FastAPI()
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 UPLOAD_DIR = "uploaded_files"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 MAX_FILE_SIZE_MB = 10
 def extract_text_from_file(file_path):
     ext = os.path.splitext(file_path)[-1].lower()
     if ext == ".txt":
@@ -48,6 +46,7 @@ def extract_text_from_file(file_path):
     else:
         return "❌ Unsupported file type."
 @app.post("/chat-with-file")
 async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)):
     try:
@@ -72,6 +71,7 @@ async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
 class URLQuery(BaseModel):
     url: str
     question: str
@@ -79,22 +79,24 @@ class URLQuery(BaseModel):
 @app.post("/chat-with-url")
 async def chat_with_url(data: URLQuery):
     try:
-        loader = WebBaseLoader(data.url, header_template={"User-Agent": "Mozilla/5.0"})
-        documents = loader.load()
-        web_content = "\n".join([doc.page_content for doc in documents])
         response = client.chat.completions.create(
             model="llama3-8b-8192",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant. Use the website content to answer the user's question."},
-                {"role": "user", "content": f"Website Content:\n{web_content}\n\nNow answer this question:\n{data.question}"}
             ]
         )
         return {"answer": response.choices[0].message.content}
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
 @app.post("/extract-text-from-image")
 async def extract_text_from_image(file: UploadFile = File(...)):
     try:
@@ -104,8 +106,8 @@ async def extract_text_from_image(file: UploadFile = File(...)):
         return {"answer": text.strip() or "⚠️ No text extracted."}
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
 @app.post("/transcribe-audio")
 async def transcribe_audio(file: UploadFile = File(...)):
     try:

 from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 import os
 import io
 from dotenv import load_dotenv
 from PIL import Image
 import pytesseract
 import whisper
+import requests
+from bs4 import BeautifulSoup
 from docx import Document
 import pandas as pd
 import PyPDF2
+from groq import Groq
+# Load environment variables
 load_dotenv()
 pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
 ffmpeg_path = os.getenv("FFMPEG_PATH", "/usr/bin")
 os.environ["PATH"] += os.pathsep + ffmpeg_path
 app = FastAPI()
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 UPLOAD_DIR = "uploaded_files"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 MAX_FILE_SIZE_MB = 10
+# ========== File Text Extraction ==========
 def extract_text_from_file(file_path):
     ext = os.path.splitext(file_path)[-1].lower()
     if ext == ".txt":
     else:
         return "❌ Unsupported file type."
+# ========== Chat with File ==========
 @app.post("/chat-with-file")
 async def chat_with_file(file: UploadFile = File(...), question: str = Form(...)):
     try:
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
+# ========== Chat with URL ==========
 class URLQuery(BaseModel):
     url: str
     question: str
 @app.post("/chat-with-url")
 async def chat_with_url(data: URLQuery):
     try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        res = requests.get(data.url, headers=headers, timeout=10)
+        soup = BeautifulSoup(res.text, "html.parser")
+        web_content = soup.get_text(separator="\n")
+        trimmed_content = web_content[:8000]  # limit for Groq
         response = client.chat.completions.create(
             model="llama3-8b-8192",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant. Use the website content to answer the user's question."},
+                {"role": "user", "content": f"{trimmed_content}\n\nNow answer this question:\n{data.question}"}
             ]
         )
         return {"answer": response.choices[0].message.content}
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
+# ========== Extract Text from Image ==========
 @app.post("/extract-text-from-image")
 async def extract_text_from_image(file: UploadFile = File(...)):
     try:
         return {"answer": text.strip() or "⚠️ No text extracted."}
     except Exception as e:
         return JSONResponse(status_code=500, content={"error": str(e)})
+# ========== Transcribe Audio ==========
 @app.post("/transcribe-audio")
 async def transcribe_audio(file: UploadFile = File(...)):
     try: