cryogenic22 commited on
Commit
cb2cc09
·
verified ·
1 Parent(s): 2afa289

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -3
app.py CHANGED
@@ -1,12 +1,17 @@
1
- # app.py
2
- from fastapi import FastAPI, UploadFile
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.staticfiles import StaticFiles
5
- import os
 
 
 
 
6
  from anthropic import Anthropic
 
7
 
8
  app = FastAPI()
9
  anthropic = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
 
10
 
11
  app.add_middleware(
12
  CORSMiddleware,
@@ -15,4 +20,47 @@ app.add_middleware(
15
  allow_headers=["*"],
16
  )
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
1
+ from fastapi import FastAPI, UploadFile, File
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.staticfiles import StaticFiles
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
+ import numpy as np
7
+ from typing import List, Dict
8
+ import json
9
  from anthropic import Anthropic
10
+ import os
11
 
12
  app = FastAPI()
13
  anthropic = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
14
+ model = SentenceTransformer('all-MiniLM-L6-v2')
15
 
16
  app.add_middleware(
17
  CORSMiddleware,
 
20
  allow_headers=["*"],
21
  )
22
 
23
+ @app.post("/api/process-file")
24
+ async def process_file(file: UploadFile = File(...)):
25
+ content = await file.read()
26
+
27
+ # Save temporarily
28
+ with open(f"temp_{file.filename}", "wb") as f:
29
+ f.write(content)
30
+
31
+ # Read file
32
+ df = pd.read_excel(f"temp_{file.filename}") if file.filename.endswith('.xlsx') else pd.read_csv(f"temp_{file.filename}")
33
+
34
+ # Create text representations
35
+ text_reps = []
36
+ for _, row in df.iterrows():
37
+ text_rep = " ".join([f"{col}: {val}" for col, val in row.items()])
38
+ text_reps.append(text_rep)
39
+
40
+ # Generate embeddings
41
+ embeddings = model.encode(text_reps)
42
+
43
+ metadata = {
44
+ 'columns': list(df.columns),
45
+ 'row_count': len(df),
46
+ 'numerical_cols': list(df.select_dtypes(include=[np.number]).columns),
47
+ 'categorical_cols': list(df.select_dtypes(include=['object']).columns)
48
+ }
49
+
50
+ os.remove(f"temp_{file.filename}")
51
+
52
+ return {
53
+ 'embeddings': embeddings.tolist(),
54
+ 'metadata': metadata,
55
+ 'raw_data': df.to_dict('records')
56
+ }
57
+
58
+ @app.post("/api/query")
59
+ async def query_data(query: str, embeddings: List[List[float]], k: int = 5):
60
+ query_embedding = model.encode([query])[0]
61
+ similarities = np.dot(embeddings, query_embedding)
62
+ indices = np.argsort(similarities)[-k:][::-1].tolist()
63
+ return {"similar_indices": indices}
64
+
65
+ # Mount static files at root
66
  app.mount("/", StaticFiles(directory="static", html=True), name="static")